1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * ip_vs_proto.c: transport protocol load balancing support for IPVS |
4 | * |
5 | * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> |
6 | * Julian Anastasov <ja@ssi.bg> |
7 | * |
8 | * Changes: |
9 | */ |
10 | |
11 | #define KMSG_COMPONENT "IPVS" |
12 | #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt |
13 | |
14 | #include <linux/module.h> |
15 | #include <linux/kernel.h> |
16 | #include <linux/skbuff.h> |
17 | #include <linux/gfp.h> |
18 | #include <linux/in.h> |
19 | #include <linux/ip.h> |
20 | #include <net/protocol.h> |
21 | #include <net/tcp.h> |
22 | #include <net/udp.h> |
23 | #include <linux/stat.h> |
24 | #include <linux/proc_fs.h> |
25 | |
26 | #include <net/ip_vs.h> |
27 | |
28 | |
29 | /* |
30 | * IPVS protocols can only be registered/unregistered when the ipvs |
31 | * module is loaded/unloaded, so no lock is needed in accessing the |
32 | * ipvs protocol table. |
33 | */ |
34 | |
35 | #define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */ |
36 | #define IP_VS_PROTO_HASH(proto) ((proto) & (IP_VS_PROTO_TAB_SIZE-1)) |
37 | |
38 | static struct ip_vs_protocol *ip_vs_proto_table[IP_VS_PROTO_TAB_SIZE]; |
39 | |
40 | /* States for conn templates: NONE or words separated with ",", max 15 chars */ |
41 | static const char *ip_vs_ctpl_state_name_table[IP_VS_CTPL_S_LAST] = { |
42 | [IP_VS_CTPL_S_NONE] = "NONE" , |
43 | [IP_VS_CTPL_S_ASSURED] = "ASSURED" , |
44 | }; |
45 | |
46 | /* |
47 | * register an ipvs protocol |
48 | */ |
49 | static int __used __init register_ip_vs_protocol(struct ip_vs_protocol *pp) |
50 | { |
51 | unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); |
52 | |
53 | pp->next = ip_vs_proto_table[hash]; |
54 | ip_vs_proto_table[hash] = pp; |
55 | |
56 | if (pp->init != NULL) |
57 | pp->init(pp); |
58 | |
59 | return 0; |
60 | } |
61 | |
62 | /* |
63 | * register an ipvs protocols netns related data |
64 | */ |
65 | static int |
66 | register_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_protocol *pp) |
67 | { |
68 | unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); |
69 | struct ip_vs_proto_data *pd = |
70 | kzalloc(size: sizeof(struct ip_vs_proto_data), GFP_KERNEL); |
71 | |
72 | if (!pd) |
73 | return -ENOMEM; |
74 | |
75 | pd->pp = pp; /* For speed issues */ |
76 | pd->next = ipvs->proto_data_table[hash]; |
77 | ipvs->proto_data_table[hash] = pd; |
78 | atomic_set(v: &pd->appcnt, i: 0); /* Init app counter */ |
79 | |
80 | if (pp->init_netns != NULL) { |
81 | int ret = pp->init_netns(ipvs, pd); |
82 | if (ret) { |
83 | /* unlink an free proto data */ |
84 | ipvs->proto_data_table[hash] = pd->next; |
85 | kfree(objp: pd); |
86 | return ret; |
87 | } |
88 | } |
89 | |
90 | return 0; |
91 | } |
92 | |
93 | /* |
94 | * unregister an ipvs protocol |
95 | */ |
96 | static int unregister_ip_vs_protocol(struct ip_vs_protocol *pp) |
97 | { |
98 | struct ip_vs_protocol **pp_p; |
99 | unsigned int hash = IP_VS_PROTO_HASH(pp->protocol); |
100 | |
101 | pp_p = &ip_vs_proto_table[hash]; |
102 | for (; *pp_p; pp_p = &(*pp_p)->next) { |
103 | if (*pp_p == pp) { |
104 | *pp_p = pp->next; |
105 | if (pp->exit != NULL) |
106 | pp->exit(pp); |
107 | return 0; |
108 | } |
109 | } |
110 | |
111 | return -ESRCH; |
112 | } |
113 | |
114 | /* |
115 | * unregister an ipvs protocols netns data |
116 | */ |
117 | static int |
118 | unregister_ip_vs_proto_netns(struct netns_ipvs *ipvs, struct ip_vs_proto_data *pd) |
119 | { |
120 | struct ip_vs_proto_data **pd_p; |
121 | unsigned int hash = IP_VS_PROTO_HASH(pd->pp->protocol); |
122 | |
123 | pd_p = &ipvs->proto_data_table[hash]; |
124 | for (; *pd_p; pd_p = &(*pd_p)->next) { |
125 | if (*pd_p == pd) { |
126 | *pd_p = pd->next; |
127 | if (pd->pp->exit_netns != NULL) |
128 | pd->pp->exit_netns(ipvs, pd); |
129 | kfree(objp: pd); |
130 | return 0; |
131 | } |
132 | } |
133 | |
134 | return -ESRCH; |
135 | } |
136 | |
137 | /* |
138 | * get ip_vs_protocol object by its proto. |
139 | */ |
140 | struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto) |
141 | { |
142 | struct ip_vs_protocol *pp; |
143 | unsigned int hash = IP_VS_PROTO_HASH(proto); |
144 | |
145 | for (pp = ip_vs_proto_table[hash]; pp; pp = pp->next) { |
146 | if (pp->protocol == proto) |
147 | return pp; |
148 | } |
149 | |
150 | return NULL; |
151 | } |
152 | EXPORT_SYMBOL(ip_vs_proto_get); |
153 | |
154 | /* |
155 | * get ip_vs_protocol object data by netns and proto |
156 | */ |
157 | struct ip_vs_proto_data * |
158 | ip_vs_proto_data_get(struct netns_ipvs *ipvs, unsigned short proto) |
159 | { |
160 | struct ip_vs_proto_data *pd; |
161 | unsigned int hash = IP_VS_PROTO_HASH(proto); |
162 | |
163 | for (pd = ipvs->proto_data_table[hash]; pd; pd = pd->next) { |
164 | if (pd->pp->protocol == proto) |
165 | return pd; |
166 | } |
167 | |
168 | return NULL; |
169 | } |
170 | EXPORT_SYMBOL(ip_vs_proto_data_get); |
171 | |
172 | /* |
173 | * Propagate event for state change to all protocols |
174 | */ |
175 | void ip_vs_protocol_timeout_change(struct netns_ipvs *ipvs, int flags) |
176 | { |
177 | struct ip_vs_proto_data *pd; |
178 | int i; |
179 | |
180 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { |
181 | for (pd = ipvs->proto_data_table[i]; pd; pd = pd->next) { |
182 | if (pd->pp->timeout_change) |
183 | pd->pp->timeout_change(pd, flags); |
184 | } |
185 | } |
186 | } |
187 | |
188 | |
189 | int * |
190 | ip_vs_create_timeout_table(int *table, int size) |
191 | { |
192 | return kmemdup(p: table, size, GFP_KERNEL); |
193 | } |
194 | |
195 | |
196 | const char *ip_vs_state_name(const struct ip_vs_conn *cp) |
197 | { |
198 | unsigned int state = cp->state; |
199 | struct ip_vs_protocol *pp; |
200 | |
201 | if (cp->flags & IP_VS_CONN_F_TEMPLATE) { |
202 | |
203 | if (state >= IP_VS_CTPL_S_LAST) |
204 | return "ERR!" ; |
205 | return ip_vs_ctpl_state_name_table[state] ? : "?" ; |
206 | } |
207 | pp = ip_vs_proto_get(cp->protocol); |
208 | if (pp == NULL || pp->state_name == NULL) |
209 | return (cp->protocol == IPPROTO_IP) ? "NONE" : "ERR!" ; |
210 | return pp->state_name(state); |
211 | } |
212 | |
213 | |
214 | static void |
215 | ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, |
216 | const struct sk_buff *skb, |
217 | int offset, |
218 | const char *msg) |
219 | { |
220 | char buf[128]; |
221 | struct iphdr _iph, *ih; |
222 | |
223 | ih = skb_header_pointer(skb, offset, len: sizeof(_iph), buffer: &_iph); |
224 | if (ih == NULL) |
225 | sprintf(buf, fmt: "TRUNCATED" ); |
226 | else if (ih->frag_off & htons(IP_OFFSET)) |
227 | sprintf(buf, fmt: "%pI4->%pI4 frag" , &ih->saddr, &ih->daddr); |
228 | else { |
229 | __be16 _ports[2], *pptr; |
230 | |
231 | pptr = skb_header_pointer(skb, offset: offset + ih->ihl*4, |
232 | len: sizeof(_ports), buffer: _ports); |
233 | if (pptr == NULL) |
234 | sprintf(buf, fmt: "TRUNCATED %pI4->%pI4" , |
235 | &ih->saddr, &ih->daddr); |
236 | else |
237 | sprintf(buf, fmt: "%pI4:%u->%pI4:%u" , |
238 | &ih->saddr, ntohs(pptr[0]), |
239 | &ih->daddr, ntohs(pptr[1])); |
240 | } |
241 | |
242 | pr_debug("%s: %s %s\n" , msg, pp->name, buf); |
243 | } |
244 | |
245 | #ifdef CONFIG_IP_VS_IPV6 |
246 | static void |
247 | ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, |
248 | const struct sk_buff *skb, |
249 | int offset, |
250 | const char *msg) |
251 | { |
252 | char buf[192]; |
253 | struct ipv6hdr _iph, *ih; |
254 | |
255 | ih = skb_header_pointer(skb, offset, len: sizeof(_iph), buffer: &_iph); |
256 | if (ih == NULL) |
257 | sprintf(buf, fmt: "TRUNCATED" ); |
258 | else if (ih->nexthdr == IPPROTO_FRAGMENT) |
259 | sprintf(buf, fmt: "%pI6c->%pI6c frag" , &ih->saddr, &ih->daddr); |
260 | else { |
261 | __be16 _ports[2], *pptr; |
262 | |
263 | pptr = skb_header_pointer(skb, offset: offset + sizeof(struct ipv6hdr), |
264 | len: sizeof(_ports), buffer: _ports); |
265 | if (pptr == NULL) |
266 | sprintf(buf, fmt: "TRUNCATED %pI6c->%pI6c" , |
267 | &ih->saddr, &ih->daddr); |
268 | else |
269 | sprintf(buf, fmt: "%pI6c:%u->%pI6c:%u" , |
270 | &ih->saddr, ntohs(pptr[0]), |
271 | &ih->daddr, ntohs(pptr[1])); |
272 | } |
273 | |
274 | pr_debug("%s: %s %s\n" , msg, pp->name, buf); |
275 | } |
276 | #endif |
277 | |
278 | |
279 | void |
280 | ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, |
281 | const struct sk_buff *skb, |
282 | int offset, |
283 | const char *msg) |
284 | { |
285 | #ifdef CONFIG_IP_VS_IPV6 |
286 | if (af == AF_INET6) |
287 | ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); |
288 | else |
289 | #endif |
290 | ip_vs_tcpudp_debug_packet_v4(pp, skb, offset, msg); |
291 | } |
292 | |
293 | /* |
294 | * per network name-space init |
295 | */ |
296 | int __net_init ip_vs_protocol_net_init(struct netns_ipvs *ipvs) |
297 | { |
298 | int i, ret; |
299 | static struct ip_vs_protocol *protos[] = { |
300 | #ifdef CONFIG_IP_VS_PROTO_TCP |
301 | &ip_vs_protocol_tcp, |
302 | #endif |
303 | #ifdef CONFIG_IP_VS_PROTO_UDP |
304 | &ip_vs_protocol_udp, |
305 | #endif |
306 | #ifdef CONFIG_IP_VS_PROTO_SCTP |
307 | &ip_vs_protocol_sctp, |
308 | #endif |
309 | #ifdef CONFIG_IP_VS_PROTO_AH |
310 | &ip_vs_protocol_ah, |
311 | #endif |
312 | #ifdef CONFIG_IP_VS_PROTO_ESP |
313 | &ip_vs_protocol_esp, |
314 | #endif |
315 | }; |
316 | |
317 | for (i = 0; i < ARRAY_SIZE(protos); i++) { |
318 | ret = register_ip_vs_proto_netns(ipvs, pp: protos[i]); |
319 | if (ret < 0) |
320 | goto cleanup; |
321 | } |
322 | return 0; |
323 | |
324 | cleanup: |
325 | ip_vs_protocol_net_cleanup(ipvs); |
326 | return ret; |
327 | } |
328 | |
329 | void __net_exit ip_vs_protocol_net_cleanup(struct netns_ipvs *ipvs) |
330 | { |
331 | struct ip_vs_proto_data *pd; |
332 | int i; |
333 | |
334 | /* unregister all the ipvs proto data for this netns */ |
335 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { |
336 | while ((pd = ipvs->proto_data_table[i]) != NULL) |
337 | unregister_ip_vs_proto_netns(ipvs, pd); |
338 | } |
339 | } |
340 | |
341 | int __init ip_vs_protocol_init(void) |
342 | { |
343 | char protocols[64]; |
344 | #define REGISTER_PROTOCOL(p) \ |
345 | do { \ |
346 | register_ip_vs_protocol(p); \ |
347 | strcat(protocols, ", "); \ |
348 | strcat(protocols, (p)->name); \ |
349 | } while (0) |
350 | |
351 | protocols[0] = '\0'; |
352 | protocols[2] = '\0'; |
353 | #ifdef CONFIG_IP_VS_PROTO_TCP |
354 | REGISTER_PROTOCOL(&ip_vs_protocol_tcp); |
355 | #endif |
356 | #ifdef CONFIG_IP_VS_PROTO_UDP |
357 | REGISTER_PROTOCOL(&ip_vs_protocol_udp); |
358 | #endif |
359 | #ifdef CONFIG_IP_VS_PROTO_SCTP |
360 | REGISTER_PROTOCOL(&ip_vs_protocol_sctp); |
361 | #endif |
362 | #ifdef CONFIG_IP_VS_PROTO_AH |
363 | REGISTER_PROTOCOL(&ip_vs_protocol_ah); |
364 | #endif |
365 | #ifdef CONFIG_IP_VS_PROTO_ESP |
366 | REGISTER_PROTOCOL(&ip_vs_protocol_esp); |
367 | #endif |
368 | pr_info("Registered protocols (%s)\n" , &protocols[2]); |
369 | |
370 | return 0; |
371 | } |
372 | |
373 | |
374 | void ip_vs_protocol_cleanup(void) |
375 | { |
376 | struct ip_vs_protocol *pp; |
377 | int i; |
378 | |
379 | /* unregister all the ipvs protocols */ |
380 | for (i = 0; i < IP_VS_PROTO_TAB_SIZE; i++) { |
381 | while ((pp = ip_vs_proto_table[i]) != NULL) |
382 | unregister_ip_vs_protocol(pp); |
383 | } |
384 | } |
385 | |