1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Management Component Transport Protocol (MCTP) - routing |
4 | * implementation. |
5 | * |
6 | * This is currently based on a simple routing table, with no dst cache. The |
7 | * number of routes should stay fairly small, so the lookup cost is small. |
8 | * |
9 | * Copyright (c) 2021 Code Construct |
10 | * Copyright (c) 2021 Google |
11 | */ |
12 | |
13 | #include <linux/idr.h> |
14 | #include <linux/kconfig.h> |
15 | #include <linux/mctp.h> |
16 | #include <linux/netdevice.h> |
17 | #include <linux/rtnetlink.h> |
18 | #include <linux/skbuff.h> |
19 | |
20 | #include <uapi/linux/if_arp.h> |
21 | |
22 | #include <net/mctp.h> |
23 | #include <net/mctpdevice.h> |
24 | #include <net/netlink.h> |
25 | #include <net/sock.h> |
26 | |
27 | #include <trace/events/mctp.h> |
28 | |
29 | static const unsigned int mctp_message_maxlen = 64 * 1024; |
30 | static const unsigned long mctp_key_lifetime = 6 * CONFIG_HZ; |
31 | |
32 | static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev); |
33 | |
34 | /* route output callbacks */ |
35 | static int mctp_route_discard(struct mctp_route *route, struct sk_buff *skb) |
36 | { |
37 | kfree_skb(skb); |
38 | return 0; |
39 | } |
40 | |
41 | static struct mctp_sock *mctp_lookup_bind(struct net *net, struct sk_buff *skb) |
42 | { |
43 | struct mctp_skb_cb *cb = mctp_cb(skb); |
44 | struct mctp_hdr *mh; |
45 | struct sock *sk; |
46 | u8 type; |
47 | |
48 | WARN_ON(!rcu_read_lock_held()); |
49 | |
50 | /* TODO: look up in skb->cb? */ |
51 | mh = mctp_hdr(skb); |
52 | |
53 | if (!skb_headlen(skb)) |
54 | return NULL; |
55 | |
56 | type = (*(u8 *)skb->data) & 0x7f; |
57 | |
58 | sk_for_each_rcu(sk, &net->mctp.binds) { |
59 | struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); |
60 | |
61 | if (msk->bind_net != MCTP_NET_ANY && msk->bind_net != cb->net) |
62 | continue; |
63 | |
64 | if (msk->bind_type != type) |
65 | continue; |
66 | |
67 | if (!mctp_address_matches(match: msk->bind_addr, eid: mh->dest)) |
68 | continue; |
69 | |
70 | return msk; |
71 | } |
72 | |
73 | return NULL; |
74 | } |
75 | |
76 | static bool mctp_key_match(struct mctp_sk_key *key, mctp_eid_t local, |
77 | mctp_eid_t peer, u8 tag) |
78 | { |
79 | if (!mctp_address_matches(match: key->local_addr, eid: local)) |
80 | return false; |
81 | |
82 | if (key->peer_addr != peer) |
83 | return false; |
84 | |
85 | if (key->tag != tag) |
86 | return false; |
87 | |
88 | return true; |
89 | } |
90 | |
91 | /* returns a key (with key->lock held, and refcounted), or NULL if no such |
92 | * key exists. |
93 | */ |
94 | static struct mctp_sk_key *mctp_lookup_key(struct net *net, struct sk_buff *skb, |
95 | mctp_eid_t peer, |
96 | unsigned long *irqflags) |
97 | __acquires(&key->lock) |
98 | { |
99 | struct mctp_sk_key *key, *ret; |
100 | unsigned long flags; |
101 | struct mctp_hdr *mh; |
102 | u8 tag; |
103 | |
104 | mh = mctp_hdr(skb); |
105 | tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); |
106 | |
107 | ret = NULL; |
108 | spin_lock_irqsave(&net->mctp.keys_lock, flags); |
109 | |
110 | hlist_for_each_entry(key, &net->mctp.keys, hlist) { |
111 | if (!mctp_key_match(key, local: mh->dest, peer, tag)) |
112 | continue; |
113 | |
114 | spin_lock(lock: &key->lock); |
115 | if (key->valid) { |
116 | refcount_inc(r: &key->refs); |
117 | ret = key; |
118 | break; |
119 | } |
120 | spin_unlock(lock: &key->lock); |
121 | } |
122 | |
123 | if (ret) { |
124 | spin_unlock(lock: &net->mctp.keys_lock); |
125 | *irqflags = flags; |
126 | } else { |
127 | spin_unlock_irqrestore(lock: &net->mctp.keys_lock, flags); |
128 | } |
129 | |
130 | return ret; |
131 | } |
132 | |
133 | static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, |
134 | mctp_eid_t local, mctp_eid_t peer, |
135 | u8 tag, gfp_t gfp) |
136 | { |
137 | struct mctp_sk_key *key; |
138 | |
139 | key = kzalloc(size: sizeof(*key), flags: gfp); |
140 | if (!key) |
141 | return NULL; |
142 | |
143 | key->peer_addr = peer; |
144 | key->local_addr = local; |
145 | key->tag = tag; |
146 | key->sk = &msk->sk; |
147 | key->valid = true; |
148 | spin_lock_init(&key->lock); |
149 | refcount_set(r: &key->refs, n: 1); |
150 | sock_hold(sk: key->sk); |
151 | |
152 | return key; |
153 | } |
154 | |
155 | void mctp_key_unref(struct mctp_sk_key *key) |
156 | { |
157 | unsigned long flags; |
158 | |
159 | if (!refcount_dec_and_test(r: &key->refs)) |
160 | return; |
161 | |
162 | /* even though no refs exist here, the lock allows us to stay |
163 | * consistent with the locking requirement of mctp_dev_release_key |
164 | */ |
165 | spin_lock_irqsave(&key->lock, flags); |
166 | mctp_dev_release_key(dev: key->dev, key); |
167 | spin_unlock_irqrestore(lock: &key->lock, flags); |
168 | |
169 | sock_put(sk: key->sk); |
170 | kfree(objp: key); |
171 | } |
172 | |
173 | static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) |
174 | { |
175 | struct net *net = sock_net(sk: &msk->sk); |
176 | struct mctp_sk_key *tmp; |
177 | unsigned long flags; |
178 | int rc = 0; |
179 | |
180 | spin_lock_irqsave(&net->mctp.keys_lock, flags); |
181 | |
182 | if (sock_flag(sk: &msk->sk, flag: SOCK_DEAD)) { |
183 | rc = -EINVAL; |
184 | goto out_unlock; |
185 | } |
186 | |
187 | hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { |
188 | if (mctp_key_match(key: tmp, local: key->local_addr, peer: key->peer_addr, |
189 | tag: key->tag)) { |
190 | spin_lock(lock: &tmp->lock); |
191 | if (tmp->valid) |
192 | rc = -EEXIST; |
193 | spin_unlock(lock: &tmp->lock); |
194 | if (rc) |
195 | break; |
196 | } |
197 | } |
198 | |
199 | if (!rc) { |
200 | refcount_inc(r: &key->refs); |
201 | key->expiry = jiffies + mctp_key_lifetime; |
202 | timer_reduce(timer: &msk->key_expiry, expires: key->expiry); |
203 | |
204 | hlist_add_head(n: &key->hlist, h: &net->mctp.keys); |
205 | hlist_add_head(n: &key->sklist, h: &msk->keys); |
206 | } |
207 | |
208 | out_unlock: |
209 | spin_unlock_irqrestore(lock: &net->mctp.keys_lock, flags); |
210 | |
211 | return rc; |
212 | } |
213 | |
214 | /* Helper for mctp_route_input(). |
215 | * We're done with the key; unlock and unref the key. |
216 | * For the usual case of automatic expiry we remove the key from lists. |
217 | * In the case that manual allocation is set on a key we release the lock |
218 | * and local ref, reset reassembly, but don't remove from lists. |
219 | */ |
220 | static void __mctp_key_done_in(struct mctp_sk_key *key, struct net *net, |
221 | unsigned long flags, unsigned long reason) |
222 | __releases(&key->lock) |
223 | { |
224 | struct sk_buff *skb; |
225 | |
226 | trace_mctp_key_release(key, reason); |
227 | skb = key->reasm_head; |
228 | key->reasm_head = NULL; |
229 | |
230 | if (!key->manual_alloc) { |
231 | key->reasm_dead = true; |
232 | key->valid = false; |
233 | mctp_dev_release_key(dev: key->dev, key); |
234 | } |
235 | spin_unlock_irqrestore(lock: &key->lock, flags); |
236 | |
237 | if (!key->manual_alloc) { |
238 | spin_lock_irqsave(&net->mctp.keys_lock, flags); |
239 | if (!hlist_unhashed(h: &key->hlist)) { |
240 | hlist_del_init(n: &key->hlist); |
241 | hlist_del_init(n: &key->sklist); |
242 | mctp_key_unref(key); |
243 | } |
244 | spin_unlock_irqrestore(lock: &net->mctp.keys_lock, flags); |
245 | } |
246 | |
247 | /* and one for the local reference */ |
248 | mctp_key_unref(key); |
249 | |
250 | kfree_skb(skb); |
251 | } |
252 | |
253 | #ifdef CONFIG_MCTP_FLOWS |
254 | static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) |
255 | { |
256 | struct mctp_flow *flow; |
257 | |
258 | flow = skb_ext_add(skb, id: SKB_EXT_MCTP); |
259 | if (!flow) |
260 | return; |
261 | |
262 | refcount_inc(r: &key->refs); |
263 | flow->key = key; |
264 | } |
265 | |
266 | static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) |
267 | { |
268 | struct mctp_sk_key *key; |
269 | struct mctp_flow *flow; |
270 | |
271 | flow = skb_ext_find(skb, id: SKB_EXT_MCTP); |
272 | if (!flow) |
273 | return; |
274 | |
275 | key = flow->key; |
276 | |
277 | if (WARN_ON(key->dev && key->dev != dev)) |
278 | return; |
279 | |
280 | mctp_dev_set_key(dev, key); |
281 | } |
282 | #else |
283 | static void mctp_skb_set_flow(struct sk_buff *skb, struct mctp_sk_key *key) {} |
284 | static void mctp_flow_prepare_output(struct sk_buff *skb, struct mctp_dev *dev) {} |
285 | #endif |
286 | |
287 | static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) |
288 | { |
289 | struct mctp_hdr *hdr = mctp_hdr(skb); |
290 | u8 exp_seq, this_seq; |
291 | |
292 | this_seq = (hdr->flags_seq_tag >> MCTP_HDR_SEQ_SHIFT) |
293 | & MCTP_HDR_SEQ_MASK; |
294 | |
295 | if (!key->reasm_head) { |
296 | key->reasm_head = skb; |
297 | key->reasm_tailp = &(skb_shinfo(skb)->frag_list); |
298 | key->last_seq = this_seq; |
299 | return 0; |
300 | } |
301 | |
302 | exp_seq = (key->last_seq + 1) & MCTP_HDR_SEQ_MASK; |
303 | |
304 | if (this_seq != exp_seq) |
305 | return -EINVAL; |
306 | |
307 | if (key->reasm_head->len + skb->len > mctp_message_maxlen) |
308 | return -EINVAL; |
309 | |
310 | skb->next = NULL; |
311 | skb->sk = NULL; |
312 | *key->reasm_tailp = skb; |
313 | key->reasm_tailp = &skb->next; |
314 | |
315 | key->last_seq = this_seq; |
316 | |
317 | key->reasm_head->data_len += skb->len; |
318 | key->reasm_head->len += skb->len; |
319 | key->reasm_head->truesize += skb->truesize; |
320 | |
321 | return 0; |
322 | } |
323 | |
324 | static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) |
325 | { |
326 | struct mctp_sk_key *key, *any_key = NULL; |
327 | struct net *net = dev_net(dev: skb->dev); |
328 | struct mctp_sock *msk; |
329 | struct mctp_hdr *mh; |
330 | unsigned long f; |
331 | u8 tag, flags; |
332 | int rc; |
333 | |
334 | msk = NULL; |
335 | rc = -EINVAL; |
336 | |
337 | /* we may be receiving a locally-routed packet; drop source sk |
338 | * accounting |
339 | */ |
340 | skb_orphan(skb); |
341 | |
342 | /* ensure we have enough data for a header and a type */ |
343 | if (skb->len < sizeof(struct mctp_hdr) + 1) |
344 | goto out; |
345 | |
346 | /* grab header, advance data ptr */ |
347 | mh = mctp_hdr(skb); |
348 | skb_pull(skb, len: sizeof(struct mctp_hdr)); |
349 | |
350 | if (mh->ver != 1) |
351 | goto out; |
352 | |
353 | flags = mh->flags_seq_tag & (MCTP_HDR_FLAG_SOM | MCTP_HDR_FLAG_EOM); |
354 | tag = mh->flags_seq_tag & (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); |
355 | |
356 | rcu_read_lock(); |
357 | |
358 | /* lookup socket / reasm context, exactly matching (src,dest,tag). |
359 | * we hold a ref on the key, and key->lock held. |
360 | */ |
361 | key = mctp_lookup_key(net, skb, peer: mh->src, irqflags: &f); |
362 | |
363 | if (flags & MCTP_HDR_FLAG_SOM) { |
364 | if (key) { |
365 | msk = container_of(key->sk, struct mctp_sock, sk); |
366 | } else { |
367 | /* first response to a broadcast? do a more general |
368 | * key lookup to find the socket, but don't use this |
369 | * key for reassembly - we'll create a more specific |
370 | * one for future packets if required (ie, !EOM). |
371 | */ |
372 | any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, irqflags: &f); |
373 | if (any_key) { |
374 | msk = container_of(any_key->sk, |
375 | struct mctp_sock, sk); |
376 | spin_unlock_irqrestore(lock: &any_key->lock, flags: f); |
377 | } |
378 | } |
379 | |
380 | if (!key && !msk && (tag & MCTP_HDR_FLAG_TO)) |
381 | msk = mctp_lookup_bind(net, skb); |
382 | |
383 | if (!msk) { |
384 | rc = -ENOENT; |
385 | goto out_unlock; |
386 | } |
387 | |
388 | /* single-packet message? deliver to socket, clean up any |
389 | * pending key. |
390 | */ |
391 | if (flags & MCTP_HDR_FLAG_EOM) { |
392 | sock_queue_rcv_skb(sk: &msk->sk, skb); |
393 | if (key) { |
394 | /* we've hit a pending reassembly; not much we |
395 | * can do but drop it |
396 | */ |
397 | __mctp_key_done_in(key, net, flags: f, |
398 | reason: MCTP_TRACE_KEY_REPLIED); |
399 | key = NULL; |
400 | } |
401 | rc = 0; |
402 | goto out_unlock; |
403 | } |
404 | |
405 | /* broadcast response or a bind() - create a key for further |
406 | * packets for this message |
407 | */ |
408 | if (!key) { |
409 | key = mctp_key_alloc(msk, local: mh->dest, peer: mh->src, |
410 | tag, GFP_ATOMIC); |
411 | if (!key) { |
412 | rc = -ENOMEM; |
413 | goto out_unlock; |
414 | } |
415 | |
416 | /* we can queue without the key lock here, as the |
417 | * key isn't observable yet |
418 | */ |
419 | mctp_frag_queue(key, skb); |
420 | |
421 | /* if the key_add fails, we've raced with another |
422 | * SOM packet with the same src, dest and tag. There's |
423 | * no way to distinguish future packets, so all we |
424 | * can do is drop; we'll free the skb on exit from |
425 | * this function. |
426 | */ |
427 | rc = mctp_key_add(key, msk); |
428 | if (!rc) |
429 | trace_mctp_key_acquire(key); |
430 | |
431 | /* we don't need to release key->lock on exit, so |
432 | * clean up here and suppress the unlock via |
433 | * setting to NULL |
434 | */ |
435 | mctp_key_unref(key); |
436 | key = NULL; |
437 | |
438 | } else { |
439 | if (key->reasm_head || key->reasm_dead) { |
440 | /* duplicate start? drop everything */ |
441 | __mctp_key_done_in(key, net, flags: f, |
442 | reason: MCTP_TRACE_KEY_INVALIDATED); |
443 | rc = -EEXIST; |
444 | key = NULL; |
445 | } else { |
446 | rc = mctp_frag_queue(key, skb); |
447 | } |
448 | } |
449 | |
450 | } else if (key) { |
451 | /* this packet continues a previous message; reassemble |
452 | * using the message-specific key |
453 | */ |
454 | |
455 | /* we need to be continuing an existing reassembly... */ |
456 | if (!key->reasm_head) |
457 | rc = -EINVAL; |
458 | else |
459 | rc = mctp_frag_queue(key, skb); |
460 | |
461 | /* end of message? deliver to socket, and we're done with |
462 | * the reassembly/response key |
463 | */ |
464 | if (!rc && flags & MCTP_HDR_FLAG_EOM) { |
465 | sock_queue_rcv_skb(sk: key->sk, skb: key->reasm_head); |
466 | key->reasm_head = NULL; |
467 | __mctp_key_done_in(key, net, flags: f, reason: MCTP_TRACE_KEY_REPLIED); |
468 | key = NULL; |
469 | } |
470 | |
471 | } else { |
472 | /* not a start, no matching key */ |
473 | rc = -ENOENT; |
474 | } |
475 | |
476 | out_unlock: |
477 | rcu_read_unlock(); |
478 | if (key) { |
479 | spin_unlock_irqrestore(lock: &key->lock, flags: f); |
480 | mctp_key_unref(key); |
481 | } |
482 | if (any_key) |
483 | mctp_key_unref(key: any_key); |
484 | out: |
485 | if (rc) |
486 | kfree_skb(skb); |
487 | return rc; |
488 | } |
489 | |
490 | static unsigned int mctp_route_mtu(struct mctp_route *rt) |
491 | { |
492 | return rt->mtu ?: READ_ONCE(rt->dev->dev->mtu); |
493 | } |
494 | |
495 | static int mctp_route_output(struct mctp_route *route, struct sk_buff *skb) |
496 | { |
497 | struct mctp_skb_cb *cb = mctp_cb(skb); |
498 | struct mctp_hdr *hdr = mctp_hdr(skb); |
499 | char daddr_buf[MAX_ADDR_LEN]; |
500 | char *daddr = NULL; |
501 | unsigned int mtu; |
502 | int rc; |
503 | |
504 | skb->protocol = htons(ETH_P_MCTP); |
505 | |
506 | mtu = READ_ONCE(skb->dev->mtu); |
507 | if (skb->len > mtu) { |
508 | kfree_skb(skb); |
509 | return -EMSGSIZE; |
510 | } |
511 | |
512 | if (cb->ifindex) { |
513 | /* direct route; use the hwaddr we stashed in sendmsg */ |
514 | if (cb->halen != skb->dev->addr_len) { |
515 | /* sanity check, sendmsg should have already caught this */ |
516 | kfree_skb(skb); |
517 | return -EMSGSIZE; |
518 | } |
519 | daddr = cb->haddr; |
520 | } else { |
521 | /* If lookup fails let the device handle daddr==NULL */ |
522 | if (mctp_neigh_lookup(dev: route->dev, eid: hdr->dest, ret_hwaddr: daddr_buf) == 0) |
523 | daddr = daddr_buf; |
524 | } |
525 | |
526 | rc = dev_hard_header(skb, dev: skb->dev, ntohs(skb->protocol), |
527 | daddr, saddr: skb->dev->dev_addr, len: skb->len); |
528 | if (rc < 0) { |
529 | kfree_skb(skb); |
530 | return -EHOSTUNREACH; |
531 | } |
532 | |
533 | mctp_flow_prepare_output(skb, dev: route->dev); |
534 | |
535 | rc = dev_queue_xmit(skb); |
536 | if (rc) |
537 | rc = net_xmit_errno(rc); |
538 | |
539 | return rc; |
540 | } |
541 | |
542 | /* route alloc/release */ |
543 | static void mctp_route_release(struct mctp_route *rt) |
544 | { |
545 | if (refcount_dec_and_test(r: &rt->refs)) { |
546 | mctp_dev_put(mdev: rt->dev); |
547 | kfree_rcu(rt, rcu); |
548 | } |
549 | } |
550 | |
551 | /* returns a route with the refcount at 1 */ |
552 | static struct mctp_route *mctp_route_alloc(void) |
553 | { |
554 | struct mctp_route *rt; |
555 | |
556 | rt = kzalloc(size: sizeof(*rt), GFP_KERNEL); |
557 | if (!rt) |
558 | return NULL; |
559 | |
560 | INIT_LIST_HEAD(list: &rt->list); |
561 | refcount_set(r: &rt->refs, n: 1); |
562 | rt->output = mctp_route_discard; |
563 | |
564 | return rt; |
565 | } |
566 | |
567 | unsigned int mctp_default_net(struct net *net) |
568 | { |
569 | return READ_ONCE(net->mctp.default_net); |
570 | } |
571 | |
572 | int mctp_default_net_set(struct net *net, unsigned int index) |
573 | { |
574 | if (index == 0) |
575 | return -EINVAL; |
576 | WRITE_ONCE(net->mctp.default_net, index); |
577 | return 0; |
578 | } |
579 | |
580 | /* tag management */ |
581 | static void mctp_reserve_tag(struct net *net, struct mctp_sk_key *key, |
582 | struct mctp_sock *msk) |
583 | { |
584 | struct netns_mctp *mns = &net->mctp; |
585 | |
586 | lockdep_assert_held(&mns->keys_lock); |
587 | |
588 | key->expiry = jiffies + mctp_key_lifetime; |
589 | timer_reduce(timer: &msk->key_expiry, expires: key->expiry); |
590 | |
591 | /* we hold the net->key_lock here, allowing updates to both |
592 | * then net and sk |
593 | */ |
594 | hlist_add_head_rcu(n: &key->hlist, h: &mns->keys); |
595 | hlist_add_head_rcu(n: &key->sklist, h: &msk->keys); |
596 | refcount_inc(r: &key->refs); |
597 | } |
598 | |
599 | /* Allocate a locally-owned tag value for (saddr, daddr), and reserve |
600 | * it for the socket msk |
601 | */ |
602 | struct mctp_sk_key *mctp_alloc_local_tag(struct mctp_sock *msk, |
603 | mctp_eid_t daddr, mctp_eid_t saddr, |
604 | bool manual, u8 *tagp) |
605 | { |
606 | struct net *net = sock_net(sk: &msk->sk); |
607 | struct netns_mctp *mns = &net->mctp; |
608 | struct mctp_sk_key *key, *tmp; |
609 | unsigned long flags; |
610 | u8 tagbits; |
611 | |
612 | /* for NULL destination EIDs, we may get a response from any peer */ |
613 | if (daddr == MCTP_ADDR_NULL) |
614 | daddr = MCTP_ADDR_ANY; |
615 | |
616 | /* be optimistic, alloc now */ |
617 | key = mctp_key_alloc(msk, local: saddr, peer: daddr, tag: 0, GFP_KERNEL); |
618 | if (!key) |
619 | return ERR_PTR(error: -ENOMEM); |
620 | |
621 | /* 8 possible tag values */ |
622 | tagbits = 0xff; |
623 | |
624 | spin_lock_irqsave(&mns->keys_lock, flags); |
625 | |
626 | /* Walk through the existing keys, looking for potential conflicting |
627 | * tags. If we find a conflict, clear that bit from tagbits |
628 | */ |
629 | hlist_for_each_entry(tmp, &mns->keys, hlist) { |
630 | /* We can check the lookup fields (*_addr, tag) without the |
631 | * lock held, they don't change over the lifetime of the key. |
632 | */ |
633 | |
634 | /* if we don't own the tag, it can't conflict */ |
635 | if (tmp->tag & MCTP_HDR_FLAG_TO) |
636 | continue; |
637 | |
638 | if (!(mctp_address_matches(match: tmp->peer_addr, eid: daddr) && |
639 | mctp_address_matches(match: tmp->local_addr, eid: saddr))) |
640 | continue; |
641 | |
642 | spin_lock(lock: &tmp->lock); |
643 | /* key must still be valid. If we find a match, clear the |
644 | * potential tag value |
645 | */ |
646 | if (tmp->valid) |
647 | tagbits &= ~(1 << tmp->tag); |
648 | spin_unlock(lock: &tmp->lock); |
649 | |
650 | if (!tagbits) |
651 | break; |
652 | } |
653 | |
654 | if (tagbits) { |
655 | key->tag = __ffs(tagbits); |
656 | mctp_reserve_tag(net, key, msk); |
657 | trace_mctp_key_acquire(key); |
658 | |
659 | key->manual_alloc = manual; |
660 | *tagp = key->tag; |
661 | } |
662 | |
663 | spin_unlock_irqrestore(lock: &mns->keys_lock, flags); |
664 | |
665 | if (!tagbits) { |
666 | kfree(objp: key); |
667 | return ERR_PTR(error: -EBUSY); |
668 | } |
669 | |
670 | return key; |
671 | } |
672 | |
673 | static struct mctp_sk_key *mctp_lookup_prealloc_tag(struct mctp_sock *msk, |
674 | mctp_eid_t daddr, |
675 | u8 req_tag, u8 *tagp) |
676 | { |
677 | struct net *net = sock_net(sk: &msk->sk); |
678 | struct netns_mctp *mns = &net->mctp; |
679 | struct mctp_sk_key *key, *tmp; |
680 | unsigned long flags; |
681 | |
682 | req_tag &= ~(MCTP_TAG_PREALLOC | MCTP_TAG_OWNER); |
683 | key = NULL; |
684 | |
685 | spin_lock_irqsave(&mns->keys_lock, flags); |
686 | |
687 | hlist_for_each_entry(tmp, &mns->keys, hlist) { |
688 | if (tmp->tag != req_tag) |
689 | continue; |
690 | |
691 | if (!mctp_address_matches(match: tmp->peer_addr, eid: daddr)) |
692 | continue; |
693 | |
694 | if (!tmp->manual_alloc) |
695 | continue; |
696 | |
697 | spin_lock(lock: &tmp->lock); |
698 | if (tmp->valid) { |
699 | key = tmp; |
700 | refcount_inc(r: &key->refs); |
701 | spin_unlock(lock: &tmp->lock); |
702 | break; |
703 | } |
704 | spin_unlock(lock: &tmp->lock); |
705 | } |
706 | spin_unlock_irqrestore(lock: &mns->keys_lock, flags); |
707 | |
708 | if (!key) |
709 | return ERR_PTR(error: -ENOENT); |
710 | |
711 | if (tagp) |
712 | *tagp = key->tag; |
713 | |
714 | return key; |
715 | } |
716 | |
717 | /* routing lookups */ |
718 | static bool mctp_rt_match_eid(struct mctp_route *rt, |
719 | unsigned int net, mctp_eid_t eid) |
720 | { |
721 | return READ_ONCE(rt->dev->net) == net && |
722 | rt->min <= eid && rt->max >= eid; |
723 | } |
724 | |
725 | /* compares match, used for duplicate prevention */ |
726 | static bool mctp_rt_compare_exact(struct mctp_route *rt1, |
727 | struct mctp_route *rt2) |
728 | { |
729 | ASSERT_RTNL(); |
730 | return rt1->dev->net == rt2->dev->net && |
731 | rt1->min == rt2->min && |
732 | rt1->max == rt2->max; |
733 | } |
734 | |
735 | struct mctp_route *mctp_route_lookup(struct net *net, unsigned int dnet, |
736 | mctp_eid_t daddr) |
737 | { |
738 | struct mctp_route *tmp, *rt = NULL; |
739 | |
740 | rcu_read_lock(); |
741 | |
742 | list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { |
743 | /* TODO: add metrics */ |
744 | if (mctp_rt_match_eid(rt: tmp, net: dnet, eid: daddr)) { |
745 | if (refcount_inc_not_zero(r: &tmp->refs)) { |
746 | rt = tmp; |
747 | break; |
748 | } |
749 | } |
750 | } |
751 | |
752 | rcu_read_unlock(); |
753 | |
754 | return rt; |
755 | } |
756 | |
757 | static struct mctp_route *mctp_route_lookup_null(struct net *net, |
758 | struct net_device *dev) |
759 | { |
760 | struct mctp_route *tmp, *rt = NULL; |
761 | |
762 | rcu_read_lock(); |
763 | |
764 | list_for_each_entry_rcu(tmp, &net->mctp.routes, list) { |
765 | if (tmp->dev->dev == dev && tmp->type == RTN_LOCAL && |
766 | refcount_inc_not_zero(r: &tmp->refs)) { |
767 | rt = tmp; |
768 | break; |
769 | } |
770 | } |
771 | |
772 | rcu_read_unlock(); |
773 | |
774 | return rt; |
775 | } |
776 | |
777 | static int mctp_do_fragment_route(struct mctp_route *rt, struct sk_buff *skb, |
778 | unsigned int mtu, u8 tag) |
779 | { |
780 | const unsigned int hlen = sizeof(struct mctp_hdr); |
781 | struct mctp_hdr *hdr, *hdr2; |
782 | unsigned int pos, size, headroom; |
783 | struct sk_buff *skb2; |
784 | int rc; |
785 | u8 seq; |
786 | |
787 | hdr = mctp_hdr(skb); |
788 | seq = 0; |
789 | rc = 0; |
790 | |
791 | if (mtu < hlen + 1) { |
792 | kfree_skb(skb); |
793 | return -EMSGSIZE; |
794 | } |
795 | |
796 | /* keep same headroom as the original skb */ |
797 | headroom = skb_headroom(skb); |
798 | |
799 | /* we've got the header */ |
800 | skb_pull(skb, len: hlen); |
801 | |
802 | for (pos = 0; pos < skb->len;) { |
803 | /* size of message payload */ |
804 | size = min(mtu - hlen, skb->len - pos); |
805 | |
806 | skb2 = alloc_skb(size: headroom + hlen + size, GFP_KERNEL); |
807 | if (!skb2) { |
808 | rc = -ENOMEM; |
809 | break; |
810 | } |
811 | |
812 | /* generic skb copy */ |
813 | skb2->protocol = skb->protocol; |
814 | skb2->priority = skb->priority; |
815 | skb2->dev = skb->dev; |
816 | memcpy(skb2->cb, skb->cb, sizeof(skb2->cb)); |
817 | |
818 | if (skb->sk) |
819 | skb_set_owner_w(skb: skb2, sk: skb->sk); |
820 | |
821 | /* establish packet */ |
822 | skb_reserve(skb: skb2, len: headroom); |
823 | skb_reset_network_header(skb: skb2); |
824 | skb_put(skb: skb2, len: hlen + size); |
825 | skb2->transport_header = skb2->network_header + hlen; |
826 | |
827 | /* copy header fields, calculate SOM/EOM flags & seq */ |
828 | hdr2 = mctp_hdr(skb: skb2); |
829 | hdr2->ver = hdr->ver; |
830 | hdr2->dest = hdr->dest; |
831 | hdr2->src = hdr->src; |
832 | hdr2->flags_seq_tag = tag & |
833 | (MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO); |
834 | |
835 | if (pos == 0) |
836 | hdr2->flags_seq_tag |= MCTP_HDR_FLAG_SOM; |
837 | |
838 | if (pos + size == skb->len) |
839 | hdr2->flags_seq_tag |= MCTP_HDR_FLAG_EOM; |
840 | |
841 | hdr2->flags_seq_tag |= seq << MCTP_HDR_SEQ_SHIFT; |
842 | |
843 | /* copy message payload */ |
844 | skb_copy_bits(skb, offset: pos, to: skb_transport_header(skb: skb2), len: size); |
845 | |
846 | /* do route */ |
847 | rc = rt->output(rt, skb2); |
848 | if (rc) |
849 | break; |
850 | |
851 | seq = (seq + 1) & MCTP_HDR_SEQ_MASK; |
852 | pos += size; |
853 | } |
854 | |
855 | consume_skb(skb); |
856 | return rc; |
857 | } |
858 | |
859 | int mctp_local_output(struct sock *sk, struct mctp_route *rt, |
860 | struct sk_buff *skb, mctp_eid_t daddr, u8 req_tag) |
861 | { |
862 | struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); |
863 | struct mctp_skb_cb *cb = mctp_cb(skb); |
864 | struct mctp_route tmp_rt = {0}; |
865 | struct mctp_sk_key *key; |
866 | struct mctp_hdr *hdr; |
867 | unsigned long flags; |
868 | unsigned int mtu; |
869 | mctp_eid_t saddr; |
870 | bool ext_rt; |
871 | int rc; |
872 | u8 tag; |
873 | |
874 | rc = -ENODEV; |
875 | |
876 | if (rt) { |
877 | ext_rt = false; |
878 | if (WARN_ON(!rt->dev)) |
879 | goto out_release; |
880 | |
881 | } else if (cb->ifindex) { |
882 | struct net_device *dev; |
883 | |
884 | ext_rt = true; |
885 | rt = &tmp_rt; |
886 | |
887 | rcu_read_lock(); |
888 | dev = dev_get_by_index_rcu(net: sock_net(sk), ifindex: cb->ifindex); |
889 | if (!dev) { |
890 | rcu_read_unlock(); |
891 | return rc; |
892 | } |
893 | rt->dev = __mctp_dev_get(dev); |
894 | rcu_read_unlock(); |
895 | |
896 | if (!rt->dev) |
897 | goto out_release; |
898 | |
899 | /* establish temporary route - we set up enough to keep |
900 | * mctp_route_output happy |
901 | */ |
902 | rt->output = mctp_route_output; |
903 | rt->mtu = 0; |
904 | |
905 | } else { |
906 | return -EINVAL; |
907 | } |
908 | |
909 | spin_lock_irqsave(&rt->dev->addrs_lock, flags); |
910 | if (rt->dev->num_addrs == 0) { |
911 | rc = -EHOSTUNREACH; |
912 | } else { |
913 | /* use the outbound interface's first address as our source */ |
914 | saddr = rt->dev->addrs[0]; |
915 | rc = 0; |
916 | } |
917 | spin_unlock_irqrestore(lock: &rt->dev->addrs_lock, flags); |
918 | |
919 | if (rc) |
920 | goto out_release; |
921 | |
922 | if (req_tag & MCTP_TAG_OWNER) { |
923 | if (req_tag & MCTP_TAG_PREALLOC) |
924 | key = mctp_lookup_prealloc_tag(msk, daddr, |
925 | req_tag, tagp: &tag); |
926 | else |
927 | key = mctp_alloc_local_tag(msk, daddr, saddr, |
928 | manual: false, tagp: &tag); |
929 | |
930 | if (IS_ERR(ptr: key)) { |
931 | rc = PTR_ERR(ptr: key); |
932 | goto out_release; |
933 | } |
934 | mctp_skb_set_flow(skb, key); |
935 | /* done with the key in this scope */ |
936 | mctp_key_unref(key); |
937 | tag |= MCTP_HDR_FLAG_TO; |
938 | } else { |
939 | key = NULL; |
940 | tag = req_tag & MCTP_TAG_MASK; |
941 | } |
942 | |
943 | skb->protocol = htons(ETH_P_MCTP); |
944 | skb->priority = 0; |
945 | skb_reset_transport_header(skb); |
946 | skb_push(skb, len: sizeof(struct mctp_hdr)); |
947 | skb_reset_network_header(skb); |
948 | skb->dev = rt->dev->dev; |
949 | |
950 | /* cb->net will have been set on initial ingress */ |
951 | cb->src = saddr; |
952 | |
953 | /* set up common header fields */ |
954 | hdr = mctp_hdr(skb); |
955 | hdr->ver = 1; |
956 | hdr->dest = daddr; |
957 | hdr->src = saddr; |
958 | |
959 | mtu = mctp_route_mtu(rt); |
960 | |
961 | if (skb->len + sizeof(struct mctp_hdr) <= mtu) { |
962 | hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | |
963 | MCTP_HDR_FLAG_EOM | tag; |
964 | rc = rt->output(rt, skb); |
965 | } else { |
966 | rc = mctp_do_fragment_route(rt, skb, mtu, tag); |
967 | } |
968 | |
969 | out_release: |
970 | if (!ext_rt) |
971 | mctp_route_release(rt); |
972 | |
973 | mctp_dev_put(mdev: tmp_rt.dev); |
974 | |
975 | return rc; |
976 | } |
977 | |
978 | /* route management */ |
979 | static int mctp_route_add(struct mctp_dev *mdev, mctp_eid_t daddr_start, |
980 | unsigned int daddr_extent, unsigned int mtu, |
981 | unsigned char type) |
982 | { |
983 | int (*rtfn)(struct mctp_route *rt, struct sk_buff *skb); |
984 | struct net *net = dev_net(dev: mdev->dev); |
985 | struct mctp_route *rt, *ert; |
986 | |
987 | if (!mctp_address_unicast(eid: daddr_start)) |
988 | return -EINVAL; |
989 | |
990 | if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) |
991 | return -EINVAL; |
992 | |
993 | switch (type) { |
994 | case RTN_LOCAL: |
995 | rtfn = mctp_route_input; |
996 | break; |
997 | case RTN_UNICAST: |
998 | rtfn = mctp_route_output; |
999 | break; |
1000 | default: |
1001 | return -EINVAL; |
1002 | } |
1003 | |
1004 | rt = mctp_route_alloc(); |
1005 | if (!rt) |
1006 | return -ENOMEM; |
1007 | |
1008 | rt->min = daddr_start; |
1009 | rt->max = daddr_start + daddr_extent; |
1010 | rt->mtu = mtu; |
1011 | rt->dev = mdev; |
1012 | mctp_dev_hold(mdev: rt->dev); |
1013 | rt->type = type; |
1014 | rt->output = rtfn; |
1015 | |
1016 | ASSERT_RTNL(); |
1017 | /* Prevent duplicate identical routes. */ |
1018 | list_for_each_entry(ert, &net->mctp.routes, list) { |
1019 | if (mctp_rt_compare_exact(rt1: rt, rt2: ert)) { |
1020 | mctp_route_release(rt); |
1021 | return -EEXIST; |
1022 | } |
1023 | } |
1024 | |
1025 | list_add_rcu(new: &rt->list, head: &net->mctp.routes); |
1026 | |
1027 | return 0; |
1028 | } |
1029 | |
1030 | static int mctp_route_remove(struct mctp_dev *mdev, mctp_eid_t daddr_start, |
1031 | unsigned int daddr_extent, unsigned char type) |
1032 | { |
1033 | struct net *net = dev_net(dev: mdev->dev); |
1034 | struct mctp_route *rt, *tmp; |
1035 | mctp_eid_t daddr_end; |
1036 | bool dropped; |
1037 | |
1038 | if (daddr_extent > 0xff || daddr_start + daddr_extent >= 255) |
1039 | return -EINVAL; |
1040 | |
1041 | daddr_end = daddr_start + daddr_extent; |
1042 | dropped = false; |
1043 | |
1044 | ASSERT_RTNL(); |
1045 | |
1046 | list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { |
1047 | if (rt->dev == mdev && |
1048 | rt->min == daddr_start && rt->max == daddr_end && |
1049 | rt->type == type) { |
1050 | list_del_rcu(entry: &rt->list); |
1051 | /* TODO: immediate RTM_DELROUTE */ |
1052 | mctp_route_release(rt); |
1053 | dropped = true; |
1054 | } |
1055 | } |
1056 | |
1057 | return dropped ? 0 : -ENOENT; |
1058 | } |
1059 | |
1060 | int mctp_route_add_local(struct mctp_dev *mdev, mctp_eid_t addr) |
1061 | { |
1062 | return mctp_route_add(mdev, daddr_start: addr, daddr_extent: 0, mtu: 0, type: RTN_LOCAL); |
1063 | } |
1064 | |
1065 | int mctp_route_remove_local(struct mctp_dev *mdev, mctp_eid_t addr) |
1066 | { |
1067 | return mctp_route_remove(mdev, daddr_start: addr, daddr_extent: 0, type: RTN_LOCAL); |
1068 | } |
1069 | |
1070 | /* removes all entries for a given device */ |
1071 | void mctp_route_remove_dev(struct mctp_dev *mdev) |
1072 | { |
1073 | struct net *net = dev_net(dev: mdev->dev); |
1074 | struct mctp_route *rt, *tmp; |
1075 | |
1076 | ASSERT_RTNL(); |
1077 | list_for_each_entry_safe(rt, tmp, &net->mctp.routes, list) { |
1078 | if (rt->dev == mdev) { |
1079 | list_del_rcu(entry: &rt->list); |
1080 | /* TODO: immediate RTM_DELROUTE */ |
1081 | mctp_route_release(rt); |
1082 | } |
1083 | } |
1084 | } |
1085 | |
1086 | /* Incoming packet-handling */ |
1087 | |
1088 | static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, |
1089 | struct packet_type *pt, |
1090 | struct net_device *orig_dev) |
1091 | { |
1092 | struct net *net = dev_net(dev); |
1093 | struct mctp_dev *mdev; |
1094 | struct mctp_skb_cb *cb; |
1095 | struct mctp_route *rt; |
1096 | struct mctp_hdr *mh; |
1097 | |
1098 | rcu_read_lock(); |
1099 | mdev = __mctp_dev_get(dev); |
1100 | rcu_read_unlock(); |
1101 | if (!mdev) { |
1102 | /* basic non-data sanity checks */ |
1103 | goto err_drop; |
1104 | } |
1105 | |
1106 | if (!pskb_may_pull(skb, len: sizeof(struct mctp_hdr))) |
1107 | goto err_drop; |
1108 | |
1109 | skb_reset_transport_header(skb); |
1110 | skb_reset_network_header(skb); |
1111 | |
1112 | /* We have enough for a header; decode and route */ |
1113 | mh = mctp_hdr(skb); |
1114 | if (mh->ver < MCTP_VER_MIN || mh->ver > MCTP_VER_MAX) |
1115 | goto err_drop; |
1116 | |
1117 | /* source must be valid unicast or null; drop reserved ranges and |
1118 | * broadcast |
1119 | */ |
1120 | if (!(mctp_address_unicast(eid: mh->src) || mctp_address_null(eid: mh->src))) |
1121 | goto err_drop; |
1122 | |
1123 | /* dest address: as above, but allow broadcast */ |
1124 | if (!(mctp_address_unicast(eid: mh->dest) || mctp_address_null(eid: mh->dest) || |
1125 | mctp_address_broadcast(eid: mh->dest))) |
1126 | goto err_drop; |
1127 | |
1128 | /* MCTP drivers must populate halen/haddr */ |
1129 | if (dev->type == ARPHRD_MCTP) { |
1130 | cb = mctp_cb(skb); |
1131 | } else { |
1132 | cb = __mctp_cb(skb); |
1133 | cb->halen = 0; |
1134 | } |
1135 | cb->net = READ_ONCE(mdev->net); |
1136 | cb->ifindex = dev->ifindex; |
1137 | |
1138 | rt = mctp_route_lookup(net, dnet: cb->net, daddr: mh->dest); |
1139 | |
1140 | /* NULL EID, but addressed to our physical address */ |
1141 | if (!rt && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) |
1142 | rt = mctp_route_lookup_null(net, dev); |
1143 | |
1144 | if (!rt) |
1145 | goto err_drop; |
1146 | |
1147 | rt->output(rt, skb); |
1148 | mctp_route_release(rt); |
1149 | mctp_dev_put(mdev); |
1150 | |
1151 | return NET_RX_SUCCESS; |
1152 | |
1153 | err_drop: |
1154 | kfree_skb(skb); |
1155 | mctp_dev_put(mdev); |
1156 | return NET_RX_DROP; |
1157 | } |
1158 | |
1159 | static struct packet_type mctp_packet_type = { |
1160 | .type = cpu_to_be16(ETH_P_MCTP), |
1161 | .func = mctp_pkttype_receive, |
1162 | }; |
1163 | |
1164 | /* netlink interface */ |
1165 | |
1166 | static const struct nla_policy rta_mctp_policy[RTA_MAX + 1] = { |
1167 | [RTA_DST] = { .type = NLA_U8 }, |
1168 | [RTA_METRICS] = { .type = NLA_NESTED }, |
1169 | [RTA_OIF] = { .type = NLA_U32 }, |
1170 | }; |
1171 | |
1172 | /* Common part for RTM_NEWROUTE and RTM_DELROUTE parsing. |
1173 | * tb must hold RTA_MAX+1 elements. |
1174 | */ |
1175 | static int mctp_route_nlparse(struct sk_buff *skb, struct nlmsghdr *nlh, |
1176 | struct netlink_ext_ack *extack, |
1177 | struct nlattr **tb, struct rtmsg **rtm, |
1178 | struct mctp_dev **mdev, mctp_eid_t *daddr_start) |
1179 | { |
1180 | struct net *net = sock_net(sk: skb->sk); |
1181 | struct net_device *dev; |
1182 | unsigned int ifindex; |
1183 | int rc; |
1184 | |
1185 | rc = nlmsg_parse(nlh, hdrlen: sizeof(struct rtmsg), tb, RTA_MAX, |
1186 | policy: rta_mctp_policy, extack); |
1187 | if (rc < 0) { |
1188 | NL_SET_ERR_MSG(extack, "incorrect format" ); |
1189 | return rc; |
1190 | } |
1191 | |
1192 | if (!tb[RTA_DST]) { |
1193 | NL_SET_ERR_MSG(extack, "dst EID missing" ); |
1194 | return -EINVAL; |
1195 | } |
1196 | *daddr_start = nla_get_u8(nla: tb[RTA_DST]); |
1197 | |
1198 | if (!tb[RTA_OIF]) { |
1199 | NL_SET_ERR_MSG(extack, "ifindex missing" ); |
1200 | return -EINVAL; |
1201 | } |
1202 | ifindex = nla_get_u32(nla: tb[RTA_OIF]); |
1203 | |
1204 | *rtm = nlmsg_data(nlh); |
1205 | if ((*rtm)->rtm_family != AF_MCTP) { |
1206 | NL_SET_ERR_MSG(extack, "route family must be AF_MCTP" ); |
1207 | return -EINVAL; |
1208 | } |
1209 | |
1210 | dev = __dev_get_by_index(net, ifindex); |
1211 | if (!dev) { |
1212 | NL_SET_ERR_MSG(extack, "bad ifindex" ); |
1213 | return -ENODEV; |
1214 | } |
1215 | *mdev = mctp_dev_get_rtnl(dev); |
1216 | if (!*mdev) |
1217 | return -ENODEV; |
1218 | |
1219 | if (dev->flags & IFF_LOOPBACK) { |
1220 | NL_SET_ERR_MSG(extack, "no routes to loopback" ); |
1221 | return -EINVAL; |
1222 | } |
1223 | |
1224 | return 0; |
1225 | } |
1226 | |
1227 | static const struct nla_policy rta_metrics_policy[RTAX_MAX + 1] = { |
1228 | [RTAX_MTU] = { .type = NLA_U32 }, |
1229 | }; |
1230 | |
1231 | static int mctp_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, |
1232 | struct netlink_ext_ack *extack) |
1233 | { |
1234 | struct nlattr *tb[RTA_MAX + 1]; |
1235 | struct nlattr *tbx[RTAX_MAX + 1]; |
1236 | mctp_eid_t daddr_start; |
1237 | struct mctp_dev *mdev; |
1238 | struct rtmsg *rtm; |
1239 | unsigned int mtu; |
1240 | int rc; |
1241 | |
1242 | rc = mctp_route_nlparse(skb, nlh, extack, tb, |
1243 | rtm: &rtm, mdev: &mdev, daddr_start: &daddr_start); |
1244 | if (rc < 0) |
1245 | return rc; |
1246 | |
1247 | if (rtm->rtm_type != RTN_UNICAST) { |
1248 | NL_SET_ERR_MSG(extack, "rtm_type must be RTN_UNICAST" ); |
1249 | return -EINVAL; |
1250 | } |
1251 | |
1252 | mtu = 0; |
1253 | if (tb[RTA_METRICS]) { |
1254 | rc = nla_parse_nested(tb: tbx, RTAX_MAX, nla: tb[RTA_METRICS], |
1255 | policy: rta_metrics_policy, NULL); |
1256 | if (rc < 0) |
1257 | return rc; |
1258 | if (tbx[RTAX_MTU]) |
1259 | mtu = nla_get_u32(nla: tbx[RTAX_MTU]); |
1260 | } |
1261 | |
1262 | rc = mctp_route_add(mdev, daddr_start, daddr_extent: rtm->rtm_dst_len, mtu, |
1263 | type: rtm->rtm_type); |
1264 | return rc; |
1265 | } |
1266 | |
1267 | static int mctp_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, |
1268 | struct netlink_ext_ack *extack) |
1269 | { |
1270 | struct nlattr *tb[RTA_MAX + 1]; |
1271 | mctp_eid_t daddr_start; |
1272 | struct mctp_dev *mdev; |
1273 | struct rtmsg *rtm; |
1274 | int rc; |
1275 | |
1276 | rc = mctp_route_nlparse(skb, nlh, extack, tb, |
1277 | rtm: &rtm, mdev: &mdev, daddr_start: &daddr_start); |
1278 | if (rc < 0) |
1279 | return rc; |
1280 | |
1281 | /* we only have unicast routes */ |
1282 | if (rtm->rtm_type != RTN_UNICAST) |
1283 | return -EINVAL; |
1284 | |
1285 | rc = mctp_route_remove(mdev, daddr_start, daddr_extent: rtm->rtm_dst_len, type: RTN_UNICAST); |
1286 | return rc; |
1287 | } |
1288 | |
1289 | static int mctp_fill_rtinfo(struct sk_buff *skb, struct mctp_route *rt, |
1290 | u32 portid, u32 seq, int event, unsigned int flags) |
1291 | { |
1292 | struct nlmsghdr *nlh; |
1293 | struct rtmsg *hdr; |
1294 | void *metrics; |
1295 | |
1296 | nlh = nlmsg_put(skb, portid, seq, type: event, payload: sizeof(*hdr), flags); |
1297 | if (!nlh) |
1298 | return -EMSGSIZE; |
1299 | |
1300 | hdr = nlmsg_data(nlh); |
1301 | hdr->rtm_family = AF_MCTP; |
1302 | |
1303 | /* we use the _len fields as a number of EIDs, rather than |
1304 | * a number of bits in the address |
1305 | */ |
1306 | hdr->rtm_dst_len = rt->max - rt->min; |
1307 | hdr->rtm_src_len = 0; |
1308 | hdr->rtm_tos = 0; |
1309 | hdr->rtm_table = RT_TABLE_DEFAULT; |
1310 | hdr->rtm_protocol = RTPROT_STATIC; /* everything is user-defined */ |
1311 | hdr->rtm_scope = RT_SCOPE_LINK; /* TODO: scope in mctp_route? */ |
1312 | hdr->rtm_type = rt->type; |
1313 | |
1314 | if (nla_put_u8(skb, attrtype: RTA_DST, value: rt->min)) |
1315 | goto cancel; |
1316 | |
1317 | metrics = nla_nest_start_noflag(skb, attrtype: RTA_METRICS); |
1318 | if (!metrics) |
1319 | goto cancel; |
1320 | |
1321 | if (rt->mtu) { |
1322 | if (nla_put_u32(skb, RTAX_MTU, value: rt->mtu)) |
1323 | goto cancel; |
1324 | } |
1325 | |
1326 | nla_nest_end(skb, start: metrics); |
1327 | |
1328 | if (rt->dev) { |
1329 | if (nla_put_u32(skb, attrtype: RTA_OIF, value: rt->dev->dev->ifindex)) |
1330 | goto cancel; |
1331 | } |
1332 | |
1333 | /* TODO: conditional neighbour physaddr? */ |
1334 | |
1335 | nlmsg_end(skb, nlh); |
1336 | |
1337 | return 0; |
1338 | |
1339 | cancel: |
1340 | nlmsg_cancel(skb, nlh); |
1341 | return -EMSGSIZE; |
1342 | } |
1343 | |
1344 | static int mctp_dump_rtinfo(struct sk_buff *skb, struct netlink_callback *cb) |
1345 | { |
1346 | struct net *net = sock_net(sk: skb->sk); |
1347 | struct mctp_route *rt; |
1348 | int s_idx, idx; |
1349 | |
1350 | /* TODO: allow filtering on route data, possibly under |
1351 | * cb->strict_check |
1352 | */ |
1353 | |
1354 | /* TODO: change to struct overlay */ |
1355 | s_idx = cb->args[0]; |
1356 | idx = 0; |
1357 | |
1358 | rcu_read_lock(); |
1359 | list_for_each_entry_rcu(rt, &net->mctp.routes, list) { |
1360 | if (idx++ < s_idx) |
1361 | continue; |
1362 | if (mctp_fill_rtinfo(skb, rt, |
1363 | NETLINK_CB(cb->skb).portid, |
1364 | seq: cb->nlh->nlmsg_seq, |
1365 | RTM_NEWROUTE, NLM_F_MULTI) < 0) |
1366 | break; |
1367 | } |
1368 | |
1369 | rcu_read_unlock(); |
1370 | cb->args[0] = idx; |
1371 | |
1372 | return skb->len; |
1373 | } |
1374 | |
1375 | /* net namespace implementation */ |
1376 | static int __net_init mctp_routes_net_init(struct net *net) |
1377 | { |
1378 | struct netns_mctp *ns = &net->mctp; |
1379 | |
1380 | INIT_LIST_HEAD(list: &ns->routes); |
1381 | INIT_HLIST_HEAD(&ns->binds); |
1382 | mutex_init(&ns->bind_lock); |
1383 | INIT_HLIST_HEAD(&ns->keys); |
1384 | spin_lock_init(&ns->keys_lock); |
1385 | WARN_ON(mctp_default_net_set(net, MCTP_INITIAL_DEFAULT_NET)); |
1386 | return 0; |
1387 | } |
1388 | |
1389 | static void __net_exit mctp_routes_net_exit(struct net *net) |
1390 | { |
1391 | struct mctp_route *rt; |
1392 | |
1393 | rcu_read_lock(); |
1394 | list_for_each_entry_rcu(rt, &net->mctp.routes, list) |
1395 | mctp_route_release(rt); |
1396 | rcu_read_unlock(); |
1397 | } |
1398 | |
1399 | static struct pernet_operations mctp_net_ops = { |
1400 | .init = mctp_routes_net_init, |
1401 | .exit = mctp_routes_net_exit, |
1402 | }; |
1403 | |
1404 | int __init mctp_routes_init(void) |
1405 | { |
1406 | dev_add_pack(pt: &mctp_packet_type); |
1407 | |
1408 | rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_GETROUTE, |
1409 | NULL, mctp_dump_rtinfo, flags: 0); |
1410 | rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_NEWROUTE, |
1411 | mctp_newroute, NULL, flags: 0); |
1412 | rtnl_register_module(THIS_MODULE, PF_MCTP, RTM_DELROUTE, |
1413 | mctp_delroute, NULL, flags: 0); |
1414 | |
1415 | return register_pernet_subsys(&mctp_net_ops); |
1416 | } |
1417 | |
1418 | void mctp_routes_exit(void) |
1419 | { |
1420 | unregister_pernet_subsys(&mctp_net_ops); |
1421 | rtnl_unregister(PF_MCTP, RTM_DELROUTE); |
1422 | rtnl_unregister(PF_MCTP, RTM_NEWROUTE); |
1423 | rtnl_unregister(PF_MCTP, RTM_GETROUTE); |
1424 | dev_remove_pack(pt: &mctp_packet_type); |
1425 | } |
1426 | |
1427 | #if IS_ENABLED(CONFIG_MCTP_TEST) |
1428 | #include "test/route-test.c" |
1429 | #endif |
1430 | |