1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* Copyright (c) 2019 Facebook */ |
3 | #include <linux/rculist.h> |
4 | #include <linux/list.h> |
5 | #include <linux/hash.h> |
6 | #include <linux/types.h> |
7 | #include <linux/spinlock.h> |
8 | #include <linux/bpf.h> |
9 | #include <linux/btf.h> |
10 | #include <linux/btf_ids.h> |
11 | #include <linux/bpf_local_storage.h> |
12 | #include <net/bpf_sk_storage.h> |
13 | #include <net/sock.h> |
14 | #include <uapi/linux/sock_diag.h> |
15 | #include <uapi/linux/btf.h> |
16 | #include <linux/rcupdate_trace.h> |
17 | |
18 | DEFINE_BPF_STORAGE_CACHE(sk_cache); |
19 | |
20 | static struct bpf_local_storage_data * |
21 | bpf_sk_storage_lookup(struct sock *sk, struct bpf_map *map, bool cacheit_lockit) |
22 | { |
23 | struct bpf_local_storage *sk_storage; |
24 | struct bpf_local_storage_map *smap; |
25 | |
26 | sk_storage = |
27 | rcu_dereference_check(sk->sk_bpf_storage, bpf_rcu_lock_held()); |
28 | if (!sk_storage) |
29 | return NULL; |
30 | |
31 | smap = (struct bpf_local_storage_map *)map; |
32 | return bpf_local_storage_lookup(local_storage: sk_storage, smap, cacheit_lockit); |
33 | } |
34 | |
35 | static int bpf_sk_storage_del(struct sock *sk, struct bpf_map *map) |
36 | { |
37 | struct bpf_local_storage_data *sdata; |
38 | |
39 | sdata = bpf_sk_storage_lookup(sk, map, cacheit_lockit: false); |
40 | if (!sdata) |
41 | return -ENOENT; |
42 | |
43 | bpf_selem_unlink(SELEM(sdata), reuse_now: false); |
44 | |
45 | return 0; |
46 | } |
47 | |
48 | /* Called by __sk_destruct() & bpf_sk_storage_clone() */ |
49 | void bpf_sk_storage_free(struct sock *sk) |
50 | { |
51 | struct bpf_local_storage *sk_storage; |
52 | |
53 | rcu_read_lock(); |
54 | sk_storage = rcu_dereference(sk->sk_bpf_storage); |
55 | if (!sk_storage) { |
56 | rcu_read_unlock(); |
57 | return; |
58 | } |
59 | |
60 | bpf_local_storage_destroy(local_storage: sk_storage); |
61 | rcu_read_unlock(); |
62 | } |
63 | |
64 | static void bpf_sk_storage_map_free(struct bpf_map *map) |
65 | { |
66 | bpf_local_storage_map_free(map, cache: &sk_cache, NULL); |
67 | } |
68 | |
69 | static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) |
70 | { |
71 | return bpf_local_storage_map_alloc(attr, cache: &sk_cache, bpf_ma: false); |
72 | } |
73 | |
74 | static int notsupp_get_next_key(struct bpf_map *map, void *key, |
75 | void *next_key) |
76 | { |
77 | return -ENOTSUPP; |
78 | } |
79 | |
80 | static void *bpf_fd_sk_storage_lookup_elem(struct bpf_map *map, void *key) |
81 | { |
82 | struct bpf_local_storage_data *sdata; |
83 | struct socket *sock; |
84 | int fd, err; |
85 | |
86 | fd = *(int *)key; |
87 | sock = sockfd_lookup(fd, err: &err); |
88 | if (sock) { |
89 | sdata = bpf_sk_storage_lookup(sk: sock->sk, map, cacheit_lockit: true); |
90 | sockfd_put(sock); |
91 | return sdata ? sdata->data : NULL; |
92 | } |
93 | |
94 | return ERR_PTR(error: err); |
95 | } |
96 | |
97 | static long bpf_fd_sk_storage_update_elem(struct bpf_map *map, void *key, |
98 | void *value, u64 map_flags) |
99 | { |
100 | struct bpf_local_storage_data *sdata; |
101 | struct socket *sock; |
102 | int fd, err; |
103 | |
104 | fd = *(int *)key; |
105 | sock = sockfd_lookup(fd, err: &err); |
106 | if (sock) { |
107 | sdata = bpf_local_storage_update( |
108 | owner: sock->sk, smap: (struct bpf_local_storage_map *)map, value, |
109 | map_flags, GFP_ATOMIC); |
110 | sockfd_put(sock); |
111 | return PTR_ERR_OR_ZERO(ptr: sdata); |
112 | } |
113 | |
114 | return err; |
115 | } |
116 | |
117 | static long bpf_fd_sk_storage_delete_elem(struct bpf_map *map, void *key) |
118 | { |
119 | struct socket *sock; |
120 | int fd, err; |
121 | |
122 | fd = *(int *)key; |
123 | sock = sockfd_lookup(fd, err: &err); |
124 | if (sock) { |
125 | err = bpf_sk_storage_del(sk: sock->sk, map); |
126 | sockfd_put(sock); |
127 | return err; |
128 | } |
129 | |
130 | return err; |
131 | } |
132 | |
133 | static struct bpf_local_storage_elem * |
134 | bpf_sk_storage_clone_elem(struct sock *newsk, |
135 | struct bpf_local_storage_map *smap, |
136 | struct bpf_local_storage_elem *selem) |
137 | { |
138 | struct bpf_local_storage_elem *copy_selem; |
139 | |
140 | copy_selem = bpf_selem_alloc(smap, owner: newsk, NULL, charge_mem: true, GFP_ATOMIC); |
141 | if (!copy_selem) |
142 | return NULL; |
143 | |
144 | if (btf_record_has_field(rec: smap->map.record, type: BPF_SPIN_LOCK)) |
145 | copy_map_value_locked(map: &smap->map, SDATA(copy_selem)->data, |
146 | SDATA(selem)->data, lock_src: true); |
147 | else |
148 | copy_map_value(map: &smap->map, SDATA(copy_selem)->data, |
149 | SDATA(selem)->data); |
150 | |
151 | return copy_selem; |
152 | } |
153 | |
154 | int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) |
155 | { |
156 | struct bpf_local_storage *new_sk_storage = NULL; |
157 | struct bpf_local_storage *sk_storage; |
158 | struct bpf_local_storage_elem *selem; |
159 | int ret = 0; |
160 | |
161 | RCU_INIT_POINTER(newsk->sk_bpf_storage, NULL); |
162 | |
163 | rcu_read_lock(); |
164 | sk_storage = rcu_dereference(sk->sk_bpf_storage); |
165 | |
166 | if (!sk_storage || hlist_empty(h: &sk_storage->list)) |
167 | goto out; |
168 | |
169 | hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { |
170 | struct bpf_local_storage_elem *copy_selem; |
171 | struct bpf_local_storage_map *smap; |
172 | struct bpf_map *map; |
173 | |
174 | smap = rcu_dereference(SDATA(selem)->smap); |
175 | if (!(smap->map.map_flags & BPF_F_CLONE)) |
176 | continue; |
177 | |
178 | /* Note that for lockless listeners adding new element |
179 | * here can race with cleanup in bpf_local_storage_map_free. |
180 | * Try to grab map refcnt to make sure that it's still |
181 | * alive and prevent concurrent removal. |
182 | */ |
183 | map = bpf_map_inc_not_zero(map: &smap->map); |
184 | if (IS_ERR(ptr: map)) |
185 | continue; |
186 | |
187 | copy_selem = bpf_sk_storage_clone_elem(newsk, smap, selem); |
188 | if (!copy_selem) { |
189 | ret = -ENOMEM; |
190 | bpf_map_put(map); |
191 | goto out; |
192 | } |
193 | |
194 | if (new_sk_storage) { |
195 | bpf_selem_link_map(smap, selem: copy_selem); |
196 | bpf_selem_link_storage_nolock(local_storage: new_sk_storage, selem: copy_selem); |
197 | } else { |
198 | ret = bpf_local_storage_alloc(owner: newsk, smap, first_selem: copy_selem, GFP_ATOMIC); |
199 | if (ret) { |
200 | bpf_selem_free(selem: copy_selem, smap, reuse_now: true); |
201 | atomic_sub(i: smap->elem_size, |
202 | v: &newsk->sk_omem_alloc); |
203 | bpf_map_put(map); |
204 | goto out; |
205 | } |
206 | |
207 | new_sk_storage = |
208 | rcu_dereference(copy_selem->local_storage); |
209 | } |
210 | bpf_map_put(map); |
211 | } |
212 | |
213 | out: |
214 | rcu_read_unlock(); |
215 | |
216 | /* In case of an error, don't free anything explicitly here, the |
217 | * caller is responsible to call bpf_sk_storage_free. |
218 | */ |
219 | |
220 | return ret; |
221 | } |
222 | |
223 | /* *gfp_flags* is a hidden argument provided by the verifier */ |
224 | BPF_CALL_5(bpf_sk_storage_get, struct bpf_map *, map, struct sock *, sk, |
225 | void *, value, u64, flags, gfp_t, gfp_flags) |
226 | { |
227 | struct bpf_local_storage_data *sdata; |
228 | |
229 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
230 | if (!sk || !sk_fullsock(sk) || flags > BPF_SK_STORAGE_GET_F_CREATE) |
231 | return (unsigned long)NULL; |
232 | |
233 | sdata = bpf_sk_storage_lookup(sk, map, cacheit_lockit: true); |
234 | if (sdata) |
235 | return (unsigned long)sdata->data; |
236 | |
237 | if (flags == BPF_SK_STORAGE_GET_F_CREATE && |
238 | /* Cannot add new elem to a going away sk. |
239 | * Otherwise, the new elem may become a leak |
240 | * (and also other memory issues during map |
241 | * destruction). |
242 | */ |
243 | refcount_inc_not_zero(r: &sk->sk_refcnt)) { |
244 | sdata = bpf_local_storage_update( |
245 | owner: sk, smap: (struct bpf_local_storage_map *)map, value, |
246 | map_flags: BPF_NOEXIST, gfp_flags); |
247 | /* sk must be a fullsock (guaranteed by verifier), |
248 | * so sock_gen_put() is unnecessary. |
249 | */ |
250 | sock_put(sk); |
251 | return IS_ERR(ptr: sdata) ? |
252 | (unsigned long)NULL : (unsigned long)sdata->data; |
253 | } |
254 | |
255 | return (unsigned long)NULL; |
256 | } |
257 | |
258 | BPF_CALL_2(bpf_sk_storage_delete, struct bpf_map *, map, struct sock *, sk) |
259 | { |
260 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
261 | if (!sk || !sk_fullsock(sk)) |
262 | return -EINVAL; |
263 | |
264 | if (refcount_inc_not_zero(r: &sk->sk_refcnt)) { |
265 | int err; |
266 | |
267 | err = bpf_sk_storage_del(sk, map); |
268 | sock_put(sk); |
269 | return err; |
270 | } |
271 | |
272 | return -ENOENT; |
273 | } |
274 | |
275 | static int bpf_sk_storage_charge(struct bpf_local_storage_map *smap, |
276 | void *owner, u32 size) |
277 | { |
278 | struct sock *sk = (struct sock *)owner; |
279 | int optmem_max; |
280 | |
281 | optmem_max = READ_ONCE(sock_net(sk)->core.sysctl_optmem_max); |
282 | /* same check as in sock_kmalloc() */ |
283 | if (size <= optmem_max && |
284 | atomic_read(v: &sk->sk_omem_alloc) + size < optmem_max) { |
285 | atomic_add(i: size, v: &sk->sk_omem_alloc); |
286 | return 0; |
287 | } |
288 | |
289 | return -ENOMEM; |
290 | } |
291 | |
292 | static void bpf_sk_storage_uncharge(struct bpf_local_storage_map *smap, |
293 | void *owner, u32 size) |
294 | { |
295 | struct sock *sk = owner; |
296 | |
297 | atomic_sub(i: size, v: &sk->sk_omem_alloc); |
298 | } |
299 | |
300 | static struct bpf_local_storage __rcu ** |
301 | bpf_sk_storage_ptr(void *owner) |
302 | { |
303 | struct sock *sk = owner; |
304 | |
305 | return &sk->sk_bpf_storage; |
306 | } |
307 | |
308 | const struct bpf_map_ops sk_storage_map_ops = { |
309 | .map_meta_equal = bpf_map_meta_equal, |
310 | .map_alloc_check = bpf_local_storage_map_alloc_check, |
311 | .map_alloc = bpf_sk_storage_map_alloc, |
312 | .map_free = bpf_sk_storage_map_free, |
313 | .map_get_next_key = notsupp_get_next_key, |
314 | .map_lookup_elem = bpf_fd_sk_storage_lookup_elem, |
315 | .map_update_elem = bpf_fd_sk_storage_update_elem, |
316 | .map_delete_elem = bpf_fd_sk_storage_delete_elem, |
317 | .map_check_btf = bpf_local_storage_map_check_btf, |
318 | .map_btf_id = &bpf_local_storage_map_btf_id[0], |
319 | .map_local_storage_charge = bpf_sk_storage_charge, |
320 | .map_local_storage_uncharge = bpf_sk_storage_uncharge, |
321 | .map_owner_storage_ptr = bpf_sk_storage_ptr, |
322 | .map_mem_usage = bpf_local_storage_map_mem_usage, |
323 | }; |
324 | |
325 | const struct bpf_func_proto bpf_sk_storage_get_proto = { |
326 | .func = bpf_sk_storage_get, |
327 | .gpl_only = false, |
328 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, |
329 | .arg1_type = ARG_CONST_MAP_PTR, |
330 | .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, |
331 | .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, |
332 | .arg4_type = ARG_ANYTHING, |
333 | }; |
334 | |
335 | const struct bpf_func_proto bpf_sk_storage_get_cg_sock_proto = { |
336 | .func = bpf_sk_storage_get, |
337 | .gpl_only = false, |
338 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, |
339 | .arg1_type = ARG_CONST_MAP_PTR, |
340 | .arg2_type = ARG_PTR_TO_CTX, /* context is 'struct sock' */ |
341 | .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, |
342 | .arg4_type = ARG_ANYTHING, |
343 | }; |
344 | |
345 | const struct bpf_func_proto bpf_sk_storage_delete_proto = { |
346 | .func = bpf_sk_storage_delete, |
347 | .gpl_only = false, |
348 | .ret_type = RET_INTEGER, |
349 | .arg1_type = ARG_CONST_MAP_PTR, |
350 | .arg2_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, |
351 | }; |
352 | |
353 | static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) |
354 | { |
355 | const struct btf *btf_vmlinux; |
356 | const struct btf_type *t; |
357 | const char *tname; |
358 | u32 btf_id; |
359 | |
360 | if (prog->aux->dst_prog) |
361 | return false; |
362 | |
363 | /* Ensure the tracing program is not tracing |
364 | * any bpf_sk_storage*() function and also |
365 | * use the bpf_sk_storage_(get|delete) helper. |
366 | */ |
367 | switch (prog->expected_attach_type) { |
368 | case BPF_TRACE_ITER: |
369 | case BPF_TRACE_RAW_TP: |
370 | /* bpf_sk_storage has no trace point */ |
371 | return true; |
372 | case BPF_TRACE_FENTRY: |
373 | case BPF_TRACE_FEXIT: |
374 | btf_vmlinux = bpf_get_btf_vmlinux(); |
375 | if (IS_ERR_OR_NULL(ptr: btf_vmlinux)) |
376 | return false; |
377 | btf_id = prog->aux->attach_btf_id; |
378 | t = btf_type_by_id(btf: btf_vmlinux, type_id: btf_id); |
379 | tname = btf_name_by_offset(btf: btf_vmlinux, offset: t->name_off); |
380 | return !!strncmp(tname, "bpf_sk_storage" , |
381 | strlen("bpf_sk_storage" )); |
382 | default: |
383 | return false; |
384 | } |
385 | |
386 | return false; |
387 | } |
388 | |
389 | /* *gfp_flags* is a hidden argument provided by the verifier */ |
390 | BPF_CALL_5(bpf_sk_storage_get_tracing, struct bpf_map *, map, struct sock *, sk, |
391 | void *, value, u64, flags, gfp_t, gfp_flags) |
392 | { |
393 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
394 | if (in_hardirq() || in_nmi()) |
395 | return (unsigned long)NULL; |
396 | |
397 | return (unsigned long)____bpf_sk_storage_get(map, sk, value, flags, |
398 | gfp_flags); |
399 | } |
400 | |
401 | BPF_CALL_2(bpf_sk_storage_delete_tracing, struct bpf_map *, map, |
402 | struct sock *, sk) |
403 | { |
404 | WARN_ON_ONCE(!bpf_rcu_lock_held()); |
405 | if (in_hardirq() || in_nmi()) |
406 | return -EPERM; |
407 | |
408 | return ____bpf_sk_storage_delete(map, sk); |
409 | } |
410 | |
411 | const struct bpf_func_proto bpf_sk_storage_get_tracing_proto = { |
412 | .func = bpf_sk_storage_get_tracing, |
413 | .gpl_only = false, |
414 | .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, |
415 | .arg1_type = ARG_CONST_MAP_PTR, |
416 | .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, |
417 | .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], |
418 | .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, |
419 | .arg4_type = ARG_ANYTHING, |
420 | .allowed = bpf_sk_storage_tracing_allowed, |
421 | }; |
422 | |
423 | const struct bpf_func_proto bpf_sk_storage_delete_tracing_proto = { |
424 | .func = bpf_sk_storage_delete_tracing, |
425 | .gpl_only = false, |
426 | .ret_type = RET_INTEGER, |
427 | .arg1_type = ARG_CONST_MAP_PTR, |
428 | .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, |
429 | .arg2_btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], |
430 | .allowed = bpf_sk_storage_tracing_allowed, |
431 | }; |
432 | |
433 | struct bpf_sk_storage_diag { |
434 | u32 nr_maps; |
435 | struct bpf_map *maps[]; |
436 | }; |
437 | |
438 | /* The reply will be like: |
439 | * INET_DIAG_BPF_SK_STORAGES (nla_nest) |
440 | * SK_DIAG_BPF_STORAGE (nla_nest) |
441 | * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) |
442 | * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) |
443 | * SK_DIAG_BPF_STORAGE (nla_nest) |
444 | * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) |
445 | * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) |
446 | * .... |
447 | */ |
448 | static int nla_value_size(u32 value_size) |
449 | { |
450 | /* SK_DIAG_BPF_STORAGE (nla_nest) |
451 | * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) |
452 | * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) |
453 | */ |
454 | return nla_total_size(payload: 0) + nla_total_size(payload: sizeof(u32)) + |
455 | nla_total_size_64bit(payload: value_size); |
456 | } |
457 | |
458 | void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) |
459 | { |
460 | u32 i; |
461 | |
462 | if (!diag) |
463 | return; |
464 | |
465 | for (i = 0; i < diag->nr_maps; i++) |
466 | bpf_map_put(map: diag->maps[i]); |
467 | |
468 | kfree(objp: diag); |
469 | } |
470 | EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); |
471 | |
472 | static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, |
473 | const struct bpf_map *map) |
474 | { |
475 | u32 i; |
476 | |
477 | for (i = 0; i < diag->nr_maps; i++) { |
478 | if (diag->maps[i] == map) |
479 | return true; |
480 | } |
481 | |
482 | return false; |
483 | } |
484 | |
485 | struct bpf_sk_storage_diag * |
486 | bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) |
487 | { |
488 | struct bpf_sk_storage_diag *diag; |
489 | struct nlattr *nla; |
490 | u32 nr_maps = 0; |
491 | int rem, err; |
492 | |
493 | /* bpf_local_storage_map is currently limited to CAP_SYS_ADMIN as |
494 | * the map_alloc_check() side also does. |
495 | */ |
496 | if (!bpf_capable()) |
497 | return ERR_PTR(error: -EPERM); |
498 | |
499 | nla_for_each_nested(nla, nla_stgs, rem) { |
500 | if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) { |
501 | if (nla_len(nla) != sizeof(u32)) |
502 | return ERR_PTR(error: -EINVAL); |
503 | nr_maps++; |
504 | } |
505 | } |
506 | |
507 | diag = kzalloc(struct_size(diag, maps, nr_maps), GFP_KERNEL); |
508 | if (!diag) |
509 | return ERR_PTR(error: -ENOMEM); |
510 | |
511 | nla_for_each_nested(nla, nla_stgs, rem) { |
512 | struct bpf_map *map; |
513 | int map_fd; |
514 | |
515 | if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) |
516 | continue; |
517 | |
518 | map_fd = nla_get_u32(nla); |
519 | map = bpf_map_get(ufd: map_fd); |
520 | if (IS_ERR(ptr: map)) { |
521 | err = PTR_ERR(ptr: map); |
522 | goto err_free; |
523 | } |
524 | if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { |
525 | bpf_map_put(map); |
526 | err = -EINVAL; |
527 | goto err_free; |
528 | } |
529 | if (diag_check_dup(diag, map)) { |
530 | bpf_map_put(map); |
531 | err = -EEXIST; |
532 | goto err_free; |
533 | } |
534 | diag->maps[diag->nr_maps++] = map; |
535 | } |
536 | |
537 | return diag; |
538 | |
539 | err_free: |
540 | bpf_sk_storage_diag_free(diag); |
541 | return ERR_PTR(error: err); |
542 | } |
543 | EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); |
544 | |
545 | static int diag_get(struct bpf_local_storage_data *sdata, struct sk_buff *skb) |
546 | { |
547 | struct nlattr *nla_stg, *nla_value; |
548 | struct bpf_local_storage_map *smap; |
549 | |
550 | /* It cannot exceed max nlattr's payload */ |
551 | BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < BPF_LOCAL_STORAGE_MAX_VALUE_SIZE); |
552 | |
553 | nla_stg = nla_nest_start(skb, attrtype: SK_DIAG_BPF_STORAGE); |
554 | if (!nla_stg) |
555 | return -EMSGSIZE; |
556 | |
557 | smap = rcu_dereference(sdata->smap); |
558 | if (nla_put_u32(skb, attrtype: SK_DIAG_BPF_STORAGE_MAP_ID, value: smap->map.id)) |
559 | goto errout; |
560 | |
561 | nla_value = nla_reserve_64bit(skb, attrtype: SK_DIAG_BPF_STORAGE_MAP_VALUE, |
562 | attrlen: smap->map.value_size, |
563 | padattr: SK_DIAG_BPF_STORAGE_PAD); |
564 | if (!nla_value) |
565 | goto errout; |
566 | |
567 | if (btf_record_has_field(rec: smap->map.record, type: BPF_SPIN_LOCK)) |
568 | copy_map_value_locked(map: &smap->map, dst: nla_data(nla: nla_value), |
569 | src: sdata->data, lock_src: true); |
570 | else |
571 | copy_map_value(map: &smap->map, dst: nla_data(nla: nla_value), src: sdata->data); |
572 | |
573 | nla_nest_end(skb, start: nla_stg); |
574 | return 0; |
575 | |
576 | errout: |
577 | nla_nest_cancel(skb, start: nla_stg); |
578 | return -EMSGSIZE; |
579 | } |
580 | |
581 | static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, |
582 | int stg_array_type, |
583 | unsigned int *res_diag_size) |
584 | { |
585 | /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ |
586 | unsigned int diag_size = nla_total_size(payload: 0); |
587 | struct bpf_local_storage *sk_storage; |
588 | struct bpf_local_storage_elem *selem; |
589 | struct bpf_local_storage_map *smap; |
590 | struct nlattr *nla_stgs; |
591 | unsigned int saved_len; |
592 | int err = 0; |
593 | |
594 | rcu_read_lock(); |
595 | |
596 | sk_storage = rcu_dereference(sk->sk_bpf_storage); |
597 | if (!sk_storage || hlist_empty(h: &sk_storage->list)) { |
598 | rcu_read_unlock(); |
599 | return 0; |
600 | } |
601 | |
602 | nla_stgs = nla_nest_start(skb, attrtype: stg_array_type); |
603 | if (!nla_stgs) |
604 | /* Continue to learn diag_size */ |
605 | err = -EMSGSIZE; |
606 | |
607 | saved_len = skb->len; |
608 | hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { |
609 | smap = rcu_dereference(SDATA(selem)->smap); |
610 | diag_size += nla_value_size(value_size: smap->map.value_size); |
611 | |
612 | if (nla_stgs && diag_get(SDATA(selem), skb)) |
613 | /* Continue to learn diag_size */ |
614 | err = -EMSGSIZE; |
615 | } |
616 | |
617 | rcu_read_unlock(); |
618 | |
619 | if (nla_stgs) { |
620 | if (saved_len == skb->len) |
621 | nla_nest_cancel(skb, start: nla_stgs); |
622 | else |
623 | nla_nest_end(skb, start: nla_stgs); |
624 | } |
625 | |
626 | if (diag_size == nla_total_size(payload: 0)) { |
627 | *res_diag_size = 0; |
628 | return 0; |
629 | } |
630 | |
631 | *res_diag_size = diag_size; |
632 | return err; |
633 | } |
634 | |
635 | int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, |
636 | struct sock *sk, struct sk_buff *skb, |
637 | int stg_array_type, |
638 | unsigned int *res_diag_size) |
639 | { |
640 | /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ |
641 | unsigned int diag_size = nla_total_size(payload: 0); |
642 | struct bpf_local_storage *sk_storage; |
643 | struct bpf_local_storage_data *sdata; |
644 | struct nlattr *nla_stgs; |
645 | unsigned int saved_len; |
646 | int err = 0; |
647 | u32 i; |
648 | |
649 | *res_diag_size = 0; |
650 | |
651 | /* No map has been specified. Dump all. */ |
652 | if (!diag->nr_maps) |
653 | return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, |
654 | res_diag_size); |
655 | |
656 | rcu_read_lock(); |
657 | sk_storage = rcu_dereference(sk->sk_bpf_storage); |
658 | if (!sk_storage || hlist_empty(h: &sk_storage->list)) { |
659 | rcu_read_unlock(); |
660 | return 0; |
661 | } |
662 | |
663 | nla_stgs = nla_nest_start(skb, attrtype: stg_array_type); |
664 | if (!nla_stgs) |
665 | /* Continue to learn diag_size */ |
666 | err = -EMSGSIZE; |
667 | |
668 | saved_len = skb->len; |
669 | for (i = 0; i < diag->nr_maps; i++) { |
670 | sdata = bpf_local_storage_lookup(local_storage: sk_storage, |
671 | smap: (struct bpf_local_storage_map *)diag->maps[i], |
672 | cacheit_lockit: false); |
673 | |
674 | if (!sdata) |
675 | continue; |
676 | |
677 | diag_size += nla_value_size(value_size: diag->maps[i]->value_size); |
678 | |
679 | if (nla_stgs && diag_get(sdata, skb)) |
680 | /* Continue to learn diag_size */ |
681 | err = -EMSGSIZE; |
682 | } |
683 | rcu_read_unlock(); |
684 | |
685 | if (nla_stgs) { |
686 | if (saved_len == skb->len) |
687 | nla_nest_cancel(skb, start: nla_stgs); |
688 | else |
689 | nla_nest_end(skb, start: nla_stgs); |
690 | } |
691 | |
692 | if (diag_size == nla_total_size(payload: 0)) { |
693 | *res_diag_size = 0; |
694 | return 0; |
695 | } |
696 | |
697 | *res_diag_size = diag_size; |
698 | return err; |
699 | } |
700 | EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); |
701 | |
702 | struct bpf_iter_seq_sk_storage_map_info { |
703 | struct bpf_map *map; |
704 | unsigned int bucket_id; |
705 | unsigned skip_elems; |
706 | }; |
707 | |
708 | static struct bpf_local_storage_elem * |
709 | bpf_sk_storage_map_seq_find_next(struct bpf_iter_seq_sk_storage_map_info *info, |
710 | struct bpf_local_storage_elem *prev_selem) |
711 | __acquires(RCU) __releases(RCU) |
712 | { |
713 | struct bpf_local_storage *sk_storage; |
714 | struct bpf_local_storage_elem *selem; |
715 | u32 skip_elems = info->skip_elems; |
716 | struct bpf_local_storage_map *smap; |
717 | u32 bucket_id = info->bucket_id; |
718 | u32 i, count, n_buckets; |
719 | struct bpf_local_storage_map_bucket *b; |
720 | |
721 | smap = (struct bpf_local_storage_map *)info->map; |
722 | n_buckets = 1U << smap->bucket_log; |
723 | if (bucket_id >= n_buckets) |
724 | return NULL; |
725 | |
726 | /* try to find next selem in the same bucket */ |
727 | selem = prev_selem; |
728 | count = 0; |
729 | while (selem) { |
730 | selem = hlist_entry_safe(rcu_dereference(hlist_next_rcu(&selem->map_node)), |
731 | struct bpf_local_storage_elem, map_node); |
732 | if (!selem) { |
733 | /* not found, unlock and go to the next bucket */ |
734 | b = &smap->buckets[bucket_id++]; |
735 | rcu_read_unlock(); |
736 | skip_elems = 0; |
737 | break; |
738 | } |
739 | sk_storage = rcu_dereference(selem->local_storage); |
740 | if (sk_storage) { |
741 | info->skip_elems = skip_elems + count; |
742 | return selem; |
743 | } |
744 | count++; |
745 | } |
746 | |
747 | for (i = bucket_id; i < (1U << smap->bucket_log); i++) { |
748 | b = &smap->buckets[i]; |
749 | rcu_read_lock(); |
750 | count = 0; |
751 | hlist_for_each_entry_rcu(selem, &b->list, map_node) { |
752 | sk_storage = rcu_dereference(selem->local_storage); |
753 | if (sk_storage && count >= skip_elems) { |
754 | info->bucket_id = i; |
755 | info->skip_elems = count; |
756 | return selem; |
757 | } |
758 | count++; |
759 | } |
760 | rcu_read_unlock(); |
761 | skip_elems = 0; |
762 | } |
763 | |
764 | info->bucket_id = i; |
765 | info->skip_elems = 0; |
766 | return NULL; |
767 | } |
768 | |
769 | static void *bpf_sk_storage_map_seq_start(struct seq_file *seq, loff_t *pos) |
770 | { |
771 | struct bpf_local_storage_elem *selem; |
772 | |
773 | selem = bpf_sk_storage_map_seq_find_next(info: seq->private, NULL); |
774 | if (!selem) |
775 | return NULL; |
776 | |
777 | if (*pos == 0) |
778 | ++*pos; |
779 | return selem; |
780 | } |
781 | |
782 | static void *bpf_sk_storage_map_seq_next(struct seq_file *seq, void *v, |
783 | loff_t *pos) |
784 | { |
785 | struct bpf_iter_seq_sk_storage_map_info *info = seq->private; |
786 | |
787 | ++*pos; |
788 | ++info->skip_elems; |
789 | return bpf_sk_storage_map_seq_find_next(info: seq->private, prev_selem: v); |
790 | } |
791 | |
792 | struct bpf_iter__bpf_sk_storage_map { |
793 | __bpf_md_ptr(struct bpf_iter_meta *, meta); |
794 | __bpf_md_ptr(struct bpf_map *, map); |
795 | __bpf_md_ptr(struct sock *, sk); |
796 | __bpf_md_ptr(void *, value); |
797 | }; |
798 | |
799 | DEFINE_BPF_ITER_FUNC(bpf_sk_storage_map, struct bpf_iter_meta *meta, |
800 | struct bpf_map *map, struct sock *sk, |
801 | void *value) |
802 | |
803 | static int __bpf_sk_storage_map_seq_show(struct seq_file *seq, |
804 | struct bpf_local_storage_elem *selem) |
805 | { |
806 | struct bpf_iter_seq_sk_storage_map_info *info = seq->private; |
807 | struct bpf_iter__bpf_sk_storage_map ctx = {}; |
808 | struct bpf_local_storage *sk_storage; |
809 | struct bpf_iter_meta meta; |
810 | struct bpf_prog *prog; |
811 | int ret = 0; |
812 | |
813 | meta.seq = seq; |
814 | prog = bpf_iter_get_info(meta: &meta, in_stop: selem == NULL); |
815 | if (prog) { |
816 | ctx.meta = &meta; |
817 | ctx.map = info->map; |
818 | if (selem) { |
819 | sk_storage = rcu_dereference(selem->local_storage); |
820 | ctx.sk = sk_storage->owner; |
821 | ctx.value = SDATA(selem)->data; |
822 | } |
823 | ret = bpf_iter_run_prog(prog, ctx: &ctx); |
824 | } |
825 | |
826 | return ret; |
827 | } |
828 | |
829 | static int bpf_sk_storage_map_seq_show(struct seq_file *seq, void *v) |
830 | { |
831 | return __bpf_sk_storage_map_seq_show(seq, selem: v); |
832 | } |
833 | |
834 | static void bpf_sk_storage_map_seq_stop(struct seq_file *seq, void *v) |
835 | __releases(RCU) |
836 | { |
837 | if (!v) |
838 | (void)__bpf_sk_storage_map_seq_show(seq, selem: v); |
839 | else |
840 | rcu_read_unlock(); |
841 | } |
842 | |
843 | static int bpf_iter_init_sk_storage_map(void *priv_data, |
844 | struct bpf_iter_aux_info *aux) |
845 | { |
846 | struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; |
847 | |
848 | bpf_map_inc_with_uref(map: aux->map); |
849 | seq_info->map = aux->map; |
850 | return 0; |
851 | } |
852 | |
853 | static void bpf_iter_fini_sk_storage_map(void *priv_data) |
854 | { |
855 | struct bpf_iter_seq_sk_storage_map_info *seq_info = priv_data; |
856 | |
857 | bpf_map_put_with_uref(map: seq_info->map); |
858 | } |
859 | |
860 | static int bpf_iter_attach_map(struct bpf_prog *prog, |
861 | union bpf_iter_link_info *linfo, |
862 | struct bpf_iter_aux_info *aux) |
863 | { |
864 | struct bpf_map *map; |
865 | int err = -EINVAL; |
866 | |
867 | if (!linfo->map.map_fd) |
868 | return -EBADF; |
869 | |
870 | map = bpf_map_get_with_uref(ufd: linfo->map.map_fd); |
871 | if (IS_ERR(ptr: map)) |
872 | return PTR_ERR(ptr: map); |
873 | |
874 | if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) |
875 | goto put_map; |
876 | |
877 | if (prog->aux->max_rdwr_access > map->value_size) { |
878 | err = -EACCES; |
879 | goto put_map; |
880 | } |
881 | |
882 | aux->map = map; |
883 | return 0; |
884 | |
885 | put_map: |
886 | bpf_map_put_with_uref(map); |
887 | return err; |
888 | } |
889 | |
890 | static void bpf_iter_detach_map(struct bpf_iter_aux_info *aux) |
891 | { |
892 | bpf_map_put_with_uref(map: aux->map); |
893 | } |
894 | |
895 | static const struct seq_operations bpf_sk_storage_map_seq_ops = { |
896 | .start = bpf_sk_storage_map_seq_start, |
897 | .next = bpf_sk_storage_map_seq_next, |
898 | .stop = bpf_sk_storage_map_seq_stop, |
899 | .show = bpf_sk_storage_map_seq_show, |
900 | }; |
901 | |
902 | static const struct bpf_iter_seq_info iter_seq_info = { |
903 | .seq_ops = &bpf_sk_storage_map_seq_ops, |
904 | .init_seq_private = bpf_iter_init_sk_storage_map, |
905 | .fini_seq_private = bpf_iter_fini_sk_storage_map, |
906 | .seq_priv_size = sizeof(struct bpf_iter_seq_sk_storage_map_info), |
907 | }; |
908 | |
909 | static struct bpf_iter_reg bpf_sk_storage_map_reg_info = { |
910 | .target = "bpf_sk_storage_map" , |
911 | .attach_target = bpf_iter_attach_map, |
912 | .detach_target = bpf_iter_detach_map, |
913 | .show_fdinfo = bpf_iter_map_show_fdinfo, |
914 | .fill_link_info = bpf_iter_map_fill_link_info, |
915 | .ctx_arg_info_size = 2, |
916 | .ctx_arg_info = { |
917 | { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), |
918 | PTR_TO_BTF_ID_OR_NULL }, |
919 | { offsetof(struct bpf_iter__bpf_sk_storage_map, value), |
920 | PTR_TO_BUF | PTR_MAYBE_NULL }, |
921 | }, |
922 | .seq_info = &iter_seq_info, |
923 | }; |
924 | |
925 | static int __init bpf_sk_storage_map_iter_init(void) |
926 | { |
927 | bpf_sk_storage_map_reg_info.ctx_arg_info[0].btf_id = |
928 | btf_sock_ids[BTF_SOCK_TYPE_SOCK]; |
929 | return bpf_iter_reg_target(reg_info: &bpf_sk_storage_map_reg_info); |
930 | } |
931 | late_initcall(bpf_sk_storage_map_iter_init); |
932 | |