1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _INET_ECN_H_ |
3 | #define _INET_ECN_H_ |
4 | |
5 | #include <linux/ip.h> |
6 | #include <linux/skbuff.h> |
7 | #include <linux/if_vlan.h> |
8 | |
9 | #include <net/inet_sock.h> |
10 | #include <net/dsfield.h> |
11 | #include <net/checksum.h> |
12 | |
13 | enum { |
14 | INET_ECN_NOT_ECT = 0, |
15 | INET_ECN_ECT_1 = 1, |
16 | INET_ECN_ECT_0 = 2, |
17 | INET_ECN_CE = 3, |
18 | INET_ECN_MASK = 3, |
19 | }; |
20 | |
21 | extern int sysctl_tunnel_ecn_log; |
22 | |
23 | static inline int INET_ECN_is_ce(__u8 dsfield) |
24 | { |
25 | return (dsfield & INET_ECN_MASK) == INET_ECN_CE; |
26 | } |
27 | |
28 | static inline int INET_ECN_is_not_ect(__u8 dsfield) |
29 | { |
30 | return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT; |
31 | } |
32 | |
33 | static inline int INET_ECN_is_capable(__u8 dsfield) |
34 | { |
35 | return dsfield & INET_ECN_ECT_0; |
36 | } |
37 | |
38 | /* |
39 | * RFC 3168 9.1.1 |
40 | * The full-functionality option for ECN encapsulation is to copy the |
41 | * ECN codepoint of the inside header to the outside header on |
42 | * encapsulation if the inside header is not-ECT or ECT, and to set the |
43 | * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of |
44 | * the inside header is CE. |
45 | */ |
46 | static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) |
47 | { |
48 | outer &= ~INET_ECN_MASK; |
49 | outer |= !INET_ECN_is_ce(dsfield: inner) ? (inner & INET_ECN_MASK) : |
50 | INET_ECN_ECT_0; |
51 | return outer; |
52 | } |
53 | |
54 | static inline void INET_ECN_xmit(struct sock *sk) |
55 | { |
56 | inet_sk(sk)->tos |= INET_ECN_ECT_0; |
57 | if (inet6_sk(sk: sk) != NULL) |
58 | inet6_sk(sk: sk)->tclass |= INET_ECN_ECT_0; |
59 | } |
60 | |
61 | static inline void INET_ECN_dontxmit(struct sock *sk) |
62 | { |
63 | inet_sk(sk)->tos &= ~INET_ECN_MASK; |
64 | if (inet6_sk(sk: sk) != NULL) |
65 | inet6_sk(sk: sk)->tclass &= ~INET_ECN_MASK; |
66 | } |
67 | |
68 | #define IP6_ECN_flow_init(label) do { \ |
69 | (label) &= ~htonl(INET_ECN_MASK << 20); \ |
70 | } while (0) |
71 | |
72 | #define IP6_ECN_flow_xmit(sk, label) do { \ |
73 | if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \ |
74 | (label) |= htonl(INET_ECN_ECT_0 << 20); \ |
75 | } while (0) |
76 | |
77 | static inline int IP_ECN_set_ce(struct iphdr *iph) |
78 | { |
79 | u32 ecn = (iph->tos + 1) & INET_ECN_MASK; |
80 | __be16 check_add; |
81 | |
82 | /* |
83 | * After the last operation we have (in binary): |
84 | * INET_ECN_NOT_ECT => 01 |
85 | * INET_ECN_ECT_1 => 10 |
86 | * INET_ECN_ECT_0 => 11 |
87 | * INET_ECN_CE => 00 |
88 | */ |
89 | if (!(ecn & 2)) |
90 | return !ecn; |
91 | |
92 | /* |
93 | * The following gives us: |
94 | * INET_ECN_ECT_1 => check += htons(0xFFFD) |
95 | * INET_ECN_ECT_0 => check += htons(0xFFFE) |
96 | */ |
97 | check_add = (__force __be16)((__force u16)htons(0xFFFB) + |
98 | (__force u16)htons(ecn)); |
99 | |
100 | iph->check = csum16_add(csum: iph->check, addend: check_add); |
101 | iph->tos |= INET_ECN_CE; |
102 | return 1; |
103 | } |
104 | |
105 | static inline int IP_ECN_set_ect1(struct iphdr *iph) |
106 | { |
107 | if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) |
108 | return 0; |
109 | |
110 | iph->check = csum16_add(csum: iph->check, htons(0x1)); |
111 | iph->tos ^= INET_ECN_MASK; |
112 | return 1; |
113 | } |
114 | |
115 | static inline void IP_ECN_clear(struct iphdr *iph) |
116 | { |
117 | iph->tos &= ~INET_ECN_MASK; |
118 | } |
119 | |
120 | static inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) |
121 | { |
122 | dscp &= ~INET_ECN_MASK; |
123 | ipv4_change_dsfield(iph: inner, mask: INET_ECN_MASK, value: dscp); |
124 | } |
125 | |
126 | struct ipv6hdr; |
127 | |
128 | /* Note: |
129 | * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, |
130 | * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE |
131 | * In IPv6 case, no checksum compensates the change in IPv6 header, |
132 | * so we have to update skb->csum. |
133 | */ |
134 | static inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) |
135 | { |
136 | __be32 from, to; |
137 | |
138 | if (INET_ECN_is_not_ect(dsfield: ipv6_get_dsfield(ipv6h: iph))) |
139 | return 0; |
140 | |
141 | from = *(__be32 *)iph; |
142 | to = from | htonl(INET_ECN_CE << 20); |
143 | *(__be32 *)iph = to; |
144 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
145 | skb->csum = csum_add(csum: csum_sub(csum: skb->csum, addend: (__force __wsum)from), |
146 | addend: (__force __wsum)to); |
147 | return 1; |
148 | } |
149 | |
150 | static inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) |
151 | { |
152 | __be32 from, to; |
153 | |
154 | if ((ipv6_get_dsfield(ipv6h: iph) & INET_ECN_MASK) != INET_ECN_ECT_0) |
155 | return 0; |
156 | |
157 | from = *(__be32 *)iph; |
158 | to = from ^ htonl(INET_ECN_MASK << 20); |
159 | *(__be32 *)iph = to; |
160 | if (skb->ip_summed == CHECKSUM_COMPLETE) |
161 | skb->csum = csum_add(csum: csum_sub(csum: skb->csum, addend: (__force __wsum)from), |
162 | addend: (__force __wsum)to); |
163 | return 1; |
164 | } |
165 | |
166 | static inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) |
167 | { |
168 | dscp &= ~INET_ECN_MASK; |
169 | ipv6_change_dsfield(ipv6h: inner, mask: INET_ECN_MASK, value: dscp); |
170 | } |
171 | |
172 | static inline int INET_ECN_set_ce(struct sk_buff *skb) |
173 | { |
174 | switch (skb_protocol(skb, skip_vlan: true)) { |
175 | case cpu_to_be16(ETH_P_IP): |
176 | if (skb_network_header(skb) + sizeof(struct iphdr) <= |
177 | skb_tail_pointer(skb)) |
178 | return IP_ECN_set_ce(iph: ip_hdr(skb)); |
179 | break; |
180 | |
181 | case cpu_to_be16(ETH_P_IPV6): |
182 | if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= |
183 | skb_tail_pointer(skb)) |
184 | return IP6_ECN_set_ce(skb, iph: ipv6_hdr(skb)); |
185 | break; |
186 | } |
187 | |
188 | return 0; |
189 | } |
190 | |
191 | static inline int skb_get_dsfield(struct sk_buff *skb) |
192 | { |
193 | switch (skb_protocol(skb, skip_vlan: true)) { |
194 | case cpu_to_be16(ETH_P_IP): |
195 | if (!pskb_network_may_pull(skb, len: sizeof(struct iphdr))) |
196 | break; |
197 | return ipv4_get_dsfield(iph: ip_hdr(skb)); |
198 | |
199 | case cpu_to_be16(ETH_P_IPV6): |
200 | if (!pskb_network_may_pull(skb, len: sizeof(struct ipv6hdr))) |
201 | break; |
202 | return ipv6_get_dsfield(ipv6h: ipv6_hdr(skb)); |
203 | } |
204 | |
205 | return -1; |
206 | } |
207 | |
208 | static inline int INET_ECN_set_ect1(struct sk_buff *skb) |
209 | { |
210 | switch (skb_protocol(skb, skip_vlan: true)) { |
211 | case cpu_to_be16(ETH_P_IP): |
212 | if (skb_network_header(skb) + sizeof(struct iphdr) <= |
213 | skb_tail_pointer(skb)) |
214 | return IP_ECN_set_ect1(iph: ip_hdr(skb)); |
215 | break; |
216 | |
217 | case cpu_to_be16(ETH_P_IPV6): |
218 | if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= |
219 | skb_tail_pointer(skb)) |
220 | return IP6_ECN_set_ect1(skb, iph: ipv6_hdr(skb)); |
221 | break; |
222 | } |
223 | |
224 | return 0; |
225 | } |
226 | |
227 | /* |
228 | * RFC 6040 4.2 |
229 | * To decapsulate the inner header at the tunnel egress, a compliant |
230 | * tunnel egress MUST set the outgoing ECN field to the codepoint at the |
231 | * intersection of the appropriate arriving inner header (row) and outer |
232 | * header (column) in Figure 4 |
233 | * |
234 | * +---------+------------------------------------------------+ |
235 | * |Arriving | Arriving Outer Header | |
236 | * | Inner +---------+------------+------------+------------+ |
237 | * | Header | Not-ECT | ECT(0) | ECT(1) | CE | |
238 | * +---------+---------+------------+------------+------------+ |
239 | * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)| |
240 | * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | |
241 | * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | |
242 | * | CE | CE | CE | CE(!!!)| CE | |
243 | * +---------+---------+------------+------------+------------+ |
244 | * |
245 | * Figure 4: New IP in IP Decapsulation Behaviour |
246 | * |
247 | * returns 0 on success |
248 | * 1 if something is broken and should be logged (!!! above) |
249 | * 2 if packet should be dropped |
250 | */ |
251 | static inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) |
252 | { |
253 | if (INET_ECN_is_not_ect(dsfield: inner)) { |
254 | switch (outer & INET_ECN_MASK) { |
255 | case INET_ECN_NOT_ECT: |
256 | return 0; |
257 | case INET_ECN_ECT_0: |
258 | case INET_ECN_ECT_1: |
259 | return 1; |
260 | case INET_ECN_CE: |
261 | return 2; |
262 | } |
263 | } |
264 | |
265 | *set_ce = INET_ECN_is_ce(dsfield: outer); |
266 | return 0; |
267 | } |
268 | |
269 | static inline int INET_ECN_decapsulate(struct sk_buff *skb, |
270 | __u8 outer, __u8 inner) |
271 | { |
272 | bool set_ce = false; |
273 | int rc; |
274 | |
275 | rc = __INET_ECN_decapsulate(outer, inner, set_ce: &set_ce); |
276 | if (!rc) { |
277 | if (set_ce) |
278 | INET_ECN_set_ce(skb); |
279 | else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) |
280 | INET_ECN_set_ect1(skb); |
281 | } |
282 | |
283 | return rc; |
284 | } |
285 | |
286 | static inline int IP_ECN_decapsulate(const struct iphdr *oiph, |
287 | struct sk_buff *skb) |
288 | { |
289 | __u8 inner; |
290 | |
291 | switch (skb_protocol(skb, skip_vlan: true)) { |
292 | case htons(ETH_P_IP): |
293 | inner = ip_hdr(skb)->tos; |
294 | break; |
295 | case htons(ETH_P_IPV6): |
296 | inner = ipv6_get_dsfield(ipv6h: ipv6_hdr(skb)); |
297 | break; |
298 | default: |
299 | return 0; |
300 | } |
301 | |
302 | return INET_ECN_decapsulate(skb, outer: oiph->tos, inner); |
303 | } |
304 | |
305 | static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, |
306 | struct sk_buff *skb) |
307 | { |
308 | __u8 inner; |
309 | |
310 | switch (skb_protocol(skb, skip_vlan: true)) { |
311 | case htons(ETH_P_IP): |
312 | inner = ip_hdr(skb)->tos; |
313 | break; |
314 | case htons(ETH_P_IPV6): |
315 | inner = ipv6_get_dsfield(ipv6h: ipv6_hdr(skb)); |
316 | break; |
317 | default: |
318 | return 0; |
319 | } |
320 | |
321 | return INET_ECN_decapsulate(skb, outer: ipv6_get_dsfield(ipv6h: oipv6h), inner); |
322 | } |
323 | #endif |
324 | |