1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Stream Parser |
4 | * |
5 | * Copyright (c) 2016 Tom Herbert <tom@herbertland.com> |
6 | */ |
7 | |
8 | #include <linux/bpf.h> |
9 | #include <linux/errno.h> |
10 | #include <linux/errqueue.h> |
11 | #include <linux/file.h> |
12 | #include <linux/in.h> |
13 | #include <linux/kernel.h> |
14 | #include <linux/export.h> |
15 | #include <linux/init.h> |
16 | #include <linux/net.h> |
17 | #include <linux/netdevice.h> |
18 | #include <linux/poll.h> |
19 | #include <linux/rculist.h> |
20 | #include <linux/skbuff.h> |
21 | #include <linux/socket.h> |
22 | #include <linux/uaccess.h> |
23 | #include <linux/workqueue.h> |
24 | #include <net/strparser.h> |
25 | #include <net/netns/generic.h> |
26 | #include <net/sock.h> |
27 | |
28 | static struct workqueue_struct *strp_wq; |
29 | |
30 | static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) |
31 | { |
32 | return (struct _strp_msg *)((void *)skb->cb + |
33 | offsetof(struct sk_skb_cb, strp)); |
34 | } |
35 | |
36 | /* Lower lock held */ |
37 | static void strp_abort_strp(struct strparser *strp, int err) |
38 | { |
39 | /* Unrecoverable error in receive */ |
40 | |
41 | cancel_delayed_work(dwork: &strp->msg_timer_work); |
42 | |
43 | if (strp->stopped) |
44 | return; |
45 | |
46 | strp->stopped = 1; |
47 | |
48 | if (strp->sk) { |
49 | struct sock *sk = strp->sk; |
50 | |
51 | /* Report an error on the lower socket */ |
52 | sk->sk_err = -err; |
53 | sk_error_report(sk); |
54 | } |
55 | } |
56 | |
57 | static void strp_start_timer(struct strparser *strp, long timeo) |
58 | { |
59 | if (timeo && timeo != LONG_MAX) |
60 | mod_delayed_work(wq: strp_wq, dwork: &strp->msg_timer_work, delay: timeo); |
61 | } |
62 | |
63 | /* Lower lock held */ |
64 | static void strp_parser_err(struct strparser *strp, int err, |
65 | read_descriptor_t *desc) |
66 | { |
67 | desc->error = err; |
68 | kfree_skb(skb: strp->skb_head); |
69 | strp->skb_head = NULL; |
70 | strp->cb.abort_parser(strp, err); |
71 | } |
72 | |
73 | static inline int strp_peek_len(struct strparser *strp) |
74 | { |
75 | if (strp->sk) { |
76 | struct socket *sock = strp->sk->sk_socket; |
77 | |
78 | return sock->ops->peek_len(sock); |
79 | } |
80 | |
81 | /* If we don't have an associated socket there's nothing to peek. |
82 | * Return int max to avoid stopping the strparser. |
83 | */ |
84 | |
85 | return INT_MAX; |
86 | } |
87 | |
88 | /* Lower socket lock held */ |
89 | static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, |
90 | unsigned int orig_offset, size_t orig_len, |
91 | size_t max_msg_size, long timeo) |
92 | { |
93 | struct strparser *strp = (struct strparser *)desc->arg.data; |
94 | struct _strp_msg *stm; |
95 | struct sk_buff *head, *skb; |
96 | size_t eaten = 0, cand_len; |
97 | ssize_t ; |
98 | int err; |
99 | bool cloned_orig = false; |
100 | |
101 | if (strp->paused) |
102 | return 0; |
103 | |
104 | head = strp->skb_head; |
105 | if (head) { |
106 | /* Message already in progress */ |
107 | if (unlikely(orig_offset)) { |
108 | /* Getting data with a non-zero offset when a message is |
109 | * in progress is not expected. If it does happen, we |
110 | * need to clone and pull since we can't deal with |
111 | * offsets in the skbs for a message expect in the head. |
112 | */ |
113 | orig_skb = skb_clone(skb: orig_skb, GFP_ATOMIC); |
114 | if (!orig_skb) { |
115 | STRP_STATS_INCR(strp->stats.mem_fail); |
116 | desc->error = -ENOMEM; |
117 | return 0; |
118 | } |
119 | if (!pskb_pull(skb: orig_skb, len: orig_offset)) { |
120 | STRP_STATS_INCR(strp->stats.mem_fail); |
121 | kfree_skb(skb: orig_skb); |
122 | desc->error = -ENOMEM; |
123 | return 0; |
124 | } |
125 | cloned_orig = true; |
126 | orig_offset = 0; |
127 | } |
128 | |
129 | if (!strp->skb_nextp) { |
130 | /* We are going to append to the frags_list of head. |
131 | * Need to unshare the frag_list. |
132 | */ |
133 | err = skb_unclone(skb: head, GFP_ATOMIC); |
134 | if (err) { |
135 | STRP_STATS_INCR(strp->stats.mem_fail); |
136 | desc->error = err; |
137 | return 0; |
138 | } |
139 | |
140 | if (unlikely(skb_shinfo(head)->frag_list)) { |
141 | /* We can't append to an sk_buff that already |
142 | * has a frag_list. We create a new head, point |
143 | * the frag_list of that to the old head, and |
144 | * then are able to use the old head->next for |
145 | * appending to the message. |
146 | */ |
147 | if (WARN_ON(head->next)) { |
148 | desc->error = -EINVAL; |
149 | return 0; |
150 | } |
151 | |
152 | skb = alloc_skb_for_msg(first: head); |
153 | if (!skb) { |
154 | STRP_STATS_INCR(strp->stats.mem_fail); |
155 | desc->error = -ENOMEM; |
156 | return 0; |
157 | } |
158 | |
159 | strp->skb_nextp = &head->next; |
160 | strp->skb_head = skb; |
161 | head = skb; |
162 | } else { |
163 | strp->skb_nextp = |
164 | &skb_shinfo(head)->frag_list; |
165 | } |
166 | } |
167 | } |
168 | |
169 | while (eaten < orig_len) { |
170 | /* Always clone since we will consume something */ |
171 | skb = skb_clone(skb: orig_skb, GFP_ATOMIC); |
172 | if (!skb) { |
173 | STRP_STATS_INCR(strp->stats.mem_fail); |
174 | desc->error = -ENOMEM; |
175 | break; |
176 | } |
177 | |
178 | cand_len = orig_len - eaten; |
179 | |
180 | head = strp->skb_head; |
181 | if (!head) { |
182 | head = skb; |
183 | strp->skb_head = head; |
184 | /* Will set skb_nextp on next packet if needed */ |
185 | strp->skb_nextp = NULL; |
186 | stm = _strp_msg(skb: head); |
187 | memset(stm, 0, sizeof(*stm)); |
188 | stm->strp.offset = orig_offset + eaten; |
189 | } else { |
190 | /* Unclone if we are appending to an skb that we |
191 | * already share a frag_list with. |
192 | */ |
193 | if (skb_has_frag_list(skb)) { |
194 | err = skb_unclone(skb, GFP_ATOMIC); |
195 | if (err) { |
196 | STRP_STATS_INCR(strp->stats.mem_fail); |
197 | desc->error = err; |
198 | break; |
199 | } |
200 | } |
201 | |
202 | stm = _strp_msg(skb: head); |
203 | *strp->skb_nextp = skb; |
204 | strp->skb_nextp = &skb->next; |
205 | head->data_len += skb->len; |
206 | head->len += skb->len; |
207 | head->truesize += skb->truesize; |
208 | } |
209 | |
210 | if (!stm->strp.full_len) { |
211 | ssize_t len; |
212 | |
213 | len = (*strp->cb.parse_msg)(strp, head); |
214 | |
215 | if (!len) { |
216 | /* Need more header to determine length */ |
217 | if (!stm->accum_len) { |
218 | /* Start RX timer for new message */ |
219 | strp_start_timer(strp, timeo); |
220 | } |
221 | stm->accum_len += cand_len; |
222 | eaten += cand_len; |
223 | STRP_STATS_INCR(strp->stats.need_more_hdr); |
224 | WARN_ON(eaten != orig_len); |
225 | break; |
226 | } else if (len < 0) { |
227 | if (len == -ESTRPIPE && stm->accum_len) { |
228 | len = -ENODATA; |
229 | strp->unrecov_intr = 1; |
230 | } else { |
231 | strp->interrupted = 1; |
232 | } |
233 | strp_parser_err(strp, err: len, desc); |
234 | break; |
235 | } else if (len > max_msg_size) { |
236 | /* Message length exceeds maximum allowed */ |
237 | STRP_STATS_INCR(strp->stats.msg_too_big); |
238 | strp_parser_err(strp, err: -EMSGSIZE, desc); |
239 | break; |
240 | } else if (len <= (ssize_t)head->len - |
241 | skb->len - stm->strp.offset) { |
242 | /* Length must be into new skb (and also |
243 | * greater than zero) |
244 | */ |
245 | STRP_STATS_INCR(strp->stats.bad_hdr_len); |
246 | strp_parser_err(strp, err: -EPROTO, desc); |
247 | break; |
248 | } |
249 | |
250 | stm->strp.full_len = len; |
251 | } |
252 | |
253 | extra = (ssize_t)(stm->accum_len + cand_len) - |
254 | stm->strp.full_len; |
255 | |
256 | if (extra < 0) { |
257 | /* Message not complete yet. */ |
258 | if (stm->strp.full_len - stm->accum_len > |
259 | strp_peek_len(strp)) { |
260 | /* Don't have the whole message in the socket |
261 | * buffer. Set strp->need_bytes to wait for |
262 | * the rest of the message. Also, set "early |
263 | * eaten" since we've already buffered the skb |
264 | * but don't consume yet per strp_read_sock. |
265 | */ |
266 | |
267 | if (!stm->accum_len) { |
268 | /* Start RX timer for new message */ |
269 | strp_start_timer(strp, timeo); |
270 | } |
271 | |
272 | stm->accum_len += cand_len; |
273 | eaten += cand_len; |
274 | strp->need_bytes = stm->strp.full_len - |
275 | stm->accum_len; |
276 | STRP_STATS_ADD(strp->stats.bytes, cand_len); |
277 | desc->count = 0; /* Stop reading socket */ |
278 | break; |
279 | } |
280 | stm->accum_len += cand_len; |
281 | eaten += cand_len; |
282 | WARN_ON(eaten != orig_len); |
283 | break; |
284 | } |
285 | |
286 | /* Positive extra indicates more bytes than needed for the |
287 | * message |
288 | */ |
289 | |
290 | WARN_ON(extra > cand_len); |
291 | |
292 | eaten += (cand_len - extra); |
293 | |
294 | /* Hurray, we have a new message! */ |
295 | cancel_delayed_work(dwork: &strp->msg_timer_work); |
296 | strp->skb_head = NULL; |
297 | strp->need_bytes = 0; |
298 | STRP_STATS_INCR(strp->stats.msgs); |
299 | |
300 | /* Give skb to upper layer */ |
301 | strp->cb.rcv_msg(strp, head); |
302 | |
303 | if (unlikely(strp->paused)) { |
304 | /* Upper layer paused strp */ |
305 | break; |
306 | } |
307 | } |
308 | |
309 | if (cloned_orig) |
310 | kfree_skb(skb: orig_skb); |
311 | |
312 | STRP_STATS_ADD(strp->stats.bytes, eaten); |
313 | |
314 | return eaten; |
315 | } |
316 | |
317 | int strp_process(struct strparser *strp, struct sk_buff *orig_skb, |
318 | unsigned int orig_offset, size_t orig_len, |
319 | size_t max_msg_size, long timeo) |
320 | { |
321 | read_descriptor_t desc; /* Dummy arg to strp_recv */ |
322 | |
323 | desc.arg.data = strp; |
324 | |
325 | return __strp_recv(desc: &desc, orig_skb, orig_offset, orig_len, |
326 | max_msg_size, timeo); |
327 | } |
328 | EXPORT_SYMBOL_GPL(strp_process); |
329 | |
330 | static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, |
331 | unsigned int orig_offset, size_t orig_len) |
332 | { |
333 | struct strparser *strp = (struct strparser *)desc->arg.data; |
334 | |
335 | return __strp_recv(desc, orig_skb, orig_offset, orig_len, |
336 | max_msg_size: strp->sk->sk_rcvbuf, timeo: strp->sk->sk_rcvtimeo); |
337 | } |
338 | |
339 | static int default_read_sock_done(struct strparser *strp, int err) |
340 | { |
341 | return err; |
342 | } |
343 | |
344 | /* Called with lock held on lower socket */ |
345 | static int strp_read_sock(struct strparser *strp) |
346 | { |
347 | struct socket *sock = strp->sk->sk_socket; |
348 | read_descriptor_t desc; |
349 | |
350 | if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) |
351 | return -EBUSY; |
352 | |
353 | desc.arg.data = strp; |
354 | desc.error = 0; |
355 | desc.count = 1; /* give more than one skb per call */ |
356 | |
357 | /* sk should be locked here, so okay to do read_sock */ |
358 | sock->ops->read_sock(strp->sk, &desc, strp_recv); |
359 | |
360 | desc.error = strp->cb.read_sock_done(strp, desc.error); |
361 | |
362 | return desc.error; |
363 | } |
364 | |
365 | /* Lower sock lock held */ |
366 | void strp_data_ready(struct strparser *strp) |
367 | { |
368 | if (unlikely(strp->stopped) || strp->paused) |
369 | return; |
370 | |
371 | /* This check is needed to synchronize with do_strp_work. |
372 | * do_strp_work acquires a process lock (lock_sock) whereas |
373 | * the lock held here is bh_lock_sock. The two locks can be |
374 | * held by different threads at the same time, but bh_lock_sock |
375 | * allows a thread in BH context to safely check if the process |
376 | * lock is held. In this case, if the lock is held, queue work. |
377 | */ |
378 | if (sock_owned_by_user_nocheck(sk: strp->sk)) { |
379 | queue_work(wq: strp_wq, work: &strp->work); |
380 | return; |
381 | } |
382 | |
383 | if (strp->need_bytes) { |
384 | if (strp_peek_len(strp) < strp->need_bytes) |
385 | return; |
386 | } |
387 | |
388 | if (strp_read_sock(strp) == -ENOMEM) |
389 | queue_work(wq: strp_wq, work: &strp->work); |
390 | } |
391 | EXPORT_SYMBOL_GPL(strp_data_ready); |
392 | |
393 | static void do_strp_work(struct strparser *strp) |
394 | { |
395 | /* We need the read lock to synchronize with strp_data_ready. We |
396 | * need the socket lock for calling strp_read_sock. |
397 | */ |
398 | strp->cb.lock(strp); |
399 | |
400 | if (unlikely(strp->stopped)) |
401 | goto out; |
402 | |
403 | if (strp->paused) |
404 | goto out; |
405 | |
406 | if (strp_read_sock(strp) == -ENOMEM) |
407 | queue_work(wq: strp_wq, work: &strp->work); |
408 | |
409 | out: |
410 | strp->cb.unlock(strp); |
411 | } |
412 | |
413 | static void strp_work(struct work_struct *w) |
414 | { |
415 | do_strp_work(container_of(w, struct strparser, work)); |
416 | } |
417 | |
418 | static void strp_msg_timeout(struct work_struct *w) |
419 | { |
420 | struct strparser *strp = container_of(w, struct strparser, |
421 | msg_timer_work.work); |
422 | |
423 | /* Message assembly timed out */ |
424 | STRP_STATS_INCR(strp->stats.msg_timeouts); |
425 | strp->cb.lock(strp); |
426 | strp->cb.abort_parser(strp, -ETIMEDOUT); |
427 | strp->cb.unlock(strp); |
428 | } |
429 | |
430 | static void strp_sock_lock(struct strparser *strp) |
431 | { |
432 | lock_sock(sk: strp->sk); |
433 | } |
434 | |
435 | static void strp_sock_unlock(struct strparser *strp) |
436 | { |
437 | release_sock(sk: strp->sk); |
438 | } |
439 | |
440 | int strp_init(struct strparser *strp, struct sock *sk, |
441 | const struct strp_callbacks *cb) |
442 | { |
443 | |
444 | if (!cb || !cb->rcv_msg || !cb->parse_msg) |
445 | return -EINVAL; |
446 | |
447 | /* The sk (sock) arg determines the mode of the stream parser. |
448 | * |
449 | * If the sock is set then the strparser is in receive callback mode. |
450 | * The upper layer calls strp_data_ready to kick receive processing |
451 | * and strparser calls the read_sock function on the socket to |
452 | * get packets. |
453 | * |
454 | * If the sock is not set then the strparser is in general mode. |
455 | * The upper layer calls strp_process for each skb to be parsed. |
456 | */ |
457 | |
458 | if (!sk) { |
459 | if (!cb->lock || !cb->unlock) |
460 | return -EINVAL; |
461 | } |
462 | |
463 | memset(strp, 0, sizeof(*strp)); |
464 | |
465 | strp->sk = sk; |
466 | |
467 | strp->cb.lock = cb->lock ? : strp_sock_lock; |
468 | strp->cb.unlock = cb->unlock ? : strp_sock_unlock; |
469 | strp->cb.rcv_msg = cb->rcv_msg; |
470 | strp->cb.parse_msg = cb->parse_msg; |
471 | strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; |
472 | strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; |
473 | |
474 | INIT_DELAYED_WORK(&strp->msg_timer_work, strp_msg_timeout); |
475 | INIT_WORK(&strp->work, strp_work); |
476 | |
477 | return 0; |
478 | } |
479 | EXPORT_SYMBOL_GPL(strp_init); |
480 | |
481 | /* Sock process lock held (lock_sock) */ |
482 | void __strp_unpause(struct strparser *strp) |
483 | { |
484 | strp->paused = 0; |
485 | |
486 | if (strp->need_bytes) { |
487 | if (strp_peek_len(strp) < strp->need_bytes) |
488 | return; |
489 | } |
490 | strp_read_sock(strp); |
491 | } |
492 | EXPORT_SYMBOL_GPL(__strp_unpause); |
493 | |
494 | void strp_unpause(struct strparser *strp) |
495 | { |
496 | strp->paused = 0; |
497 | |
498 | /* Sync setting paused with RX work */ |
499 | smp_mb(); |
500 | |
501 | queue_work(wq: strp_wq, work: &strp->work); |
502 | } |
503 | EXPORT_SYMBOL_GPL(strp_unpause); |
504 | |
505 | /* strp must already be stopped so that strp_recv will no longer be called. |
506 | * Note that strp_done is not called with the lower socket held. |
507 | */ |
508 | void strp_done(struct strparser *strp) |
509 | { |
510 | WARN_ON(!strp->stopped); |
511 | |
512 | cancel_delayed_work_sync(dwork: &strp->msg_timer_work); |
513 | cancel_work_sync(work: &strp->work); |
514 | |
515 | if (strp->skb_head) { |
516 | kfree_skb(skb: strp->skb_head); |
517 | strp->skb_head = NULL; |
518 | } |
519 | } |
520 | EXPORT_SYMBOL_GPL(strp_done); |
521 | |
522 | void strp_stop(struct strparser *strp) |
523 | { |
524 | strp->stopped = 1; |
525 | } |
526 | EXPORT_SYMBOL_GPL(strp_stop); |
527 | |
528 | void strp_check_rcv(struct strparser *strp) |
529 | { |
530 | queue_work(wq: strp_wq, work: &strp->work); |
531 | } |
532 | EXPORT_SYMBOL_GPL(strp_check_rcv); |
533 | |
534 | static int __init strp_dev_init(void) |
535 | { |
536 | BUILD_BUG_ON(sizeof(struct sk_skb_cb) > |
537 | sizeof_field(struct sk_buff, cb)); |
538 | |
539 | strp_wq = create_singlethread_workqueue("kstrp" ); |
540 | if (unlikely(!strp_wq)) |
541 | return -ENOMEM; |
542 | |
543 | return 0; |
544 | } |
545 | device_initcall(strp_dev_init); |
546 | |