1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* -*- linux-c -*- |
3 | * sysctl_net_core.c: sysctl interface to net core subsystem. |
4 | * |
5 | * Begun April 1, 1996, Mike Shaver. |
6 | * Added /proc/sys/net/core directory entry (empty =) ). [MS] |
7 | */ |
8 | |
9 | #include <linux/filter.h> |
10 | #include <linux/mm.h> |
11 | #include <linux/sysctl.h> |
12 | #include <linux/module.h> |
13 | #include <linux/socket.h> |
14 | #include <linux/netdevice.h> |
15 | #include <linux/ratelimit.h> |
16 | #include <linux/vmalloc.h> |
17 | #include <linux/init.h> |
18 | #include <linux/slab.h> |
19 | #include <linux/sched/isolation.h> |
20 | |
21 | #include <net/ip.h> |
22 | #include <net/sock.h> |
23 | #include <net/net_ratelimit.h> |
24 | #include <net/busy_poll.h> |
25 | #include <net/pkt_sched.h> |
26 | #include <net/hotdata.h> |
27 | #include <net/rps.h> |
28 | |
29 | #include "dev.h" |
30 | |
31 | static int int_3600 = 3600; |
32 | static int min_sndbuf = SOCK_MIN_SNDBUF; |
33 | static int min_rcvbuf = SOCK_MIN_RCVBUF; |
34 | static int max_skb_frags = MAX_SKB_FRAGS; |
35 | static int min_mem_pcpu_rsv = SK_MEMORY_PCPU_RESERVE; |
36 | |
37 | static int net_msg_warn; /* Unused, but still a sysctl */ |
38 | |
39 | int sysctl_fb_tunnels_only_for_init_net __read_mostly = 0; |
40 | EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net); |
41 | |
42 | /* 0 - Keep current behavior: |
43 | * IPv4: inherit all current settings from init_net |
44 | * IPv6: reset all settings to default |
45 | * 1 - Both inherit all current settings from init_net |
46 | * 2 - Both reset all settings to default |
47 | * 3 - Both inherit all settings from current netns |
48 | */ |
49 | int sysctl_devconf_inherit_init_net __read_mostly; |
50 | EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); |
51 | |
52 | #if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS) |
53 | static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos, |
54 | struct cpumask *mask) |
55 | { |
56 | char kbuf[128]; |
57 | int len; |
58 | |
59 | if (*ppos || !*lenp) { |
60 | *lenp = 0; |
61 | return; |
62 | } |
63 | |
64 | len = min(sizeof(kbuf) - 1, *lenp); |
65 | len = scnprintf(buf: kbuf, size: len, fmt: "%*pb" , cpumask_pr_args(mask)); |
66 | if (!len) { |
67 | *lenp = 0; |
68 | return; |
69 | } |
70 | |
71 | if (len < *lenp) |
72 | kbuf[len++] = '\n'; |
73 | memcpy(buffer, kbuf, len); |
74 | *lenp = len; |
75 | *ppos += len; |
76 | } |
77 | #endif |
78 | |
79 | #ifdef CONFIG_RPS |
80 | |
81 | static struct cpumask *rps_default_mask_cow_alloc(struct net *net) |
82 | { |
83 | struct cpumask *rps_default_mask; |
84 | |
85 | if (net->core.rps_default_mask) |
86 | return net->core.rps_default_mask; |
87 | |
88 | rps_default_mask = kzalloc(size: cpumask_size(), GFP_KERNEL); |
89 | if (!rps_default_mask) |
90 | return NULL; |
91 | |
92 | /* pairs with READ_ONCE in rx_queue_default_mask() */ |
93 | WRITE_ONCE(net->core.rps_default_mask, rps_default_mask); |
94 | return rps_default_mask; |
95 | } |
96 | |
97 | static int rps_default_mask_sysctl(struct ctl_table *table, int write, |
98 | void *buffer, size_t *lenp, loff_t *ppos) |
99 | { |
100 | struct net *net = (struct net *)table->data; |
101 | int err = 0; |
102 | |
103 | rtnl_lock(); |
104 | if (write) { |
105 | struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net); |
106 | |
107 | err = -ENOMEM; |
108 | if (!rps_default_mask) |
109 | goto done; |
110 | |
111 | err = cpumask_parse(buf: buffer, dstp: rps_default_mask); |
112 | if (err) |
113 | goto done; |
114 | |
115 | err = rps_cpumask_housekeeping(mask: rps_default_mask); |
116 | if (err) |
117 | goto done; |
118 | } else { |
119 | dump_cpumask(buffer, lenp, ppos, |
120 | mask: net->core.rps_default_mask ? : cpu_none_mask); |
121 | } |
122 | |
123 | done: |
124 | rtnl_unlock(); |
125 | return err; |
126 | } |
127 | |
128 | static int rps_sock_flow_sysctl(struct ctl_table *table, int write, |
129 | void *buffer, size_t *lenp, loff_t *ppos) |
130 | { |
131 | unsigned int orig_size, size; |
132 | int ret, i; |
133 | struct ctl_table tmp = { |
134 | .data = &size, |
135 | .maxlen = sizeof(size), |
136 | .mode = table->mode |
137 | }; |
138 | struct rps_sock_flow_table *orig_sock_table, *sock_table; |
139 | static DEFINE_MUTEX(sock_flow_mutex); |
140 | |
141 | mutex_lock(&sock_flow_mutex); |
142 | |
143 | orig_sock_table = rcu_dereference_protected( |
144 | net_hotdata.rps_sock_flow_table, |
145 | lockdep_is_held(&sock_flow_mutex)); |
146 | size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; |
147 | |
148 | ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); |
149 | |
150 | if (write) { |
151 | if (size) { |
152 | if (size > 1<<29) { |
153 | /* Enforce limit to prevent overflow */ |
154 | mutex_unlock(lock: &sock_flow_mutex); |
155 | return -EINVAL; |
156 | } |
157 | size = roundup_pow_of_two(size); |
158 | if (size != orig_size) { |
159 | sock_table = |
160 | vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); |
161 | if (!sock_table) { |
162 | mutex_unlock(lock: &sock_flow_mutex); |
163 | return -ENOMEM; |
164 | } |
165 | net_hotdata.rps_cpu_mask = |
166 | roundup_pow_of_two(nr_cpu_ids) - 1; |
167 | sock_table->mask = size - 1; |
168 | } else |
169 | sock_table = orig_sock_table; |
170 | |
171 | for (i = 0; i < size; i++) |
172 | sock_table->ents[i] = RPS_NO_CPU; |
173 | } else |
174 | sock_table = NULL; |
175 | |
176 | if (sock_table != orig_sock_table) { |
177 | rcu_assign_pointer(net_hotdata.rps_sock_flow_table, |
178 | sock_table); |
179 | if (sock_table) { |
180 | static_branch_inc(&rps_needed); |
181 | static_branch_inc(&rfs_needed); |
182 | } |
183 | if (orig_sock_table) { |
184 | static_branch_dec(&rps_needed); |
185 | static_branch_dec(&rfs_needed); |
186 | kvfree_rcu_mightsleep(orig_sock_table); |
187 | } |
188 | } |
189 | } |
190 | |
191 | mutex_unlock(lock: &sock_flow_mutex); |
192 | |
193 | return ret; |
194 | } |
195 | #endif /* CONFIG_RPS */ |
196 | |
197 | #ifdef CONFIG_NET_FLOW_LIMIT |
198 | static DEFINE_MUTEX(flow_limit_update_mutex); |
199 | |
200 | static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, |
201 | void *buffer, size_t *lenp, loff_t *ppos) |
202 | { |
203 | struct sd_flow_limit *cur; |
204 | struct softnet_data *sd; |
205 | cpumask_var_t mask; |
206 | int i, len, ret = 0; |
207 | |
208 | if (!alloc_cpumask_var(mask: &mask, GFP_KERNEL)) |
209 | return -ENOMEM; |
210 | |
211 | if (write) { |
212 | ret = cpumask_parse(buf: buffer, dstp: mask); |
213 | if (ret) |
214 | goto done; |
215 | |
216 | mutex_lock(&flow_limit_update_mutex); |
217 | len = sizeof(*cur) + netdev_flow_limit_table_len; |
218 | for_each_possible_cpu(i) { |
219 | sd = &per_cpu(softnet_data, i); |
220 | cur = rcu_dereference_protected(sd->flow_limit, |
221 | lockdep_is_held(&flow_limit_update_mutex)); |
222 | if (cur && !cpumask_test_cpu(cpu: i, cpumask: mask)) { |
223 | RCU_INIT_POINTER(sd->flow_limit, NULL); |
224 | kfree_rcu_mightsleep(cur); |
225 | } else if (!cur && cpumask_test_cpu(cpu: i, cpumask: mask)) { |
226 | cur = kzalloc_node(size: len, GFP_KERNEL, |
227 | cpu_to_node(cpu: i)); |
228 | if (!cur) { |
229 | /* not unwinding previous changes */ |
230 | ret = -ENOMEM; |
231 | goto write_unlock; |
232 | } |
233 | cur->num_buckets = netdev_flow_limit_table_len; |
234 | rcu_assign_pointer(sd->flow_limit, cur); |
235 | } |
236 | } |
237 | write_unlock: |
238 | mutex_unlock(lock: &flow_limit_update_mutex); |
239 | } else { |
240 | cpumask_clear(dstp: mask); |
241 | rcu_read_lock(); |
242 | for_each_possible_cpu(i) { |
243 | sd = &per_cpu(softnet_data, i); |
244 | if (rcu_dereference(sd->flow_limit)) |
245 | cpumask_set_cpu(cpu: i, dstp: mask); |
246 | } |
247 | rcu_read_unlock(); |
248 | |
249 | dump_cpumask(buffer, lenp, ppos, mask); |
250 | } |
251 | |
252 | done: |
253 | free_cpumask_var(mask); |
254 | return ret; |
255 | } |
256 | |
257 | static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, |
258 | void *buffer, size_t *lenp, loff_t *ppos) |
259 | { |
260 | unsigned int old, *ptr; |
261 | int ret; |
262 | |
263 | mutex_lock(&flow_limit_update_mutex); |
264 | |
265 | ptr = table->data; |
266 | old = *ptr; |
267 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
268 | if (!ret && write && !is_power_of_2(n: *ptr)) { |
269 | *ptr = old; |
270 | ret = -EINVAL; |
271 | } |
272 | |
273 | mutex_unlock(lock: &flow_limit_update_mutex); |
274 | return ret; |
275 | } |
276 | #endif /* CONFIG_NET_FLOW_LIMIT */ |
277 | |
278 | #ifdef CONFIG_NET_SCHED |
279 | static int set_default_qdisc(struct ctl_table *table, int write, |
280 | void *buffer, size_t *lenp, loff_t *ppos) |
281 | { |
282 | char id[IFNAMSIZ]; |
283 | struct ctl_table tbl = { |
284 | .data = id, |
285 | .maxlen = IFNAMSIZ, |
286 | }; |
287 | int ret; |
288 | |
289 | qdisc_get_default(id, IFNAMSIZ); |
290 | |
291 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); |
292 | if (write && ret == 0) |
293 | ret = qdisc_set_default(id); |
294 | return ret; |
295 | } |
296 | #endif |
297 | |
298 | static int proc_do_dev_weight(struct ctl_table *table, int write, |
299 | void *buffer, size_t *lenp, loff_t *ppos) |
300 | { |
301 | static DEFINE_MUTEX(dev_weight_mutex); |
302 | int ret, weight; |
303 | |
304 | mutex_lock(&dev_weight_mutex); |
305 | ret = proc_dointvec(table, write, buffer, lenp, ppos); |
306 | if (!ret && write) { |
307 | weight = READ_ONCE(weight_p); |
308 | WRITE_ONCE(net_hotdata.dev_rx_weight, weight * dev_weight_rx_bias); |
309 | WRITE_ONCE(net_hotdata.dev_tx_weight, weight * dev_weight_tx_bias); |
310 | } |
311 | mutex_unlock(lock: &dev_weight_mutex); |
312 | |
313 | return ret; |
314 | } |
315 | |
316 | static int (struct ctl_table *table, int write, |
317 | void *buffer, size_t *lenp, loff_t *ppos) |
318 | { |
319 | struct ctl_table fake_table; |
320 | char buf[NETDEV_RSS_KEY_LEN * 3]; |
321 | |
322 | snprintf(buf, size: sizeof(buf), fmt: "%*phC" , NETDEV_RSS_KEY_LEN, netdev_rss_key); |
323 | fake_table.data = buf; |
324 | fake_table.maxlen = sizeof(buf); |
325 | return proc_dostring(&fake_table, write, buffer, lenp, ppos); |
326 | } |
327 | |
328 | #ifdef CONFIG_BPF_JIT |
329 | static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, |
330 | void *buffer, size_t *lenp, |
331 | loff_t *ppos) |
332 | { |
333 | int ret, jit_enable = *(int *)table->data; |
334 | int min = *(int *)table->extra1; |
335 | int max = *(int *)table->extra2; |
336 | struct ctl_table tmp = *table; |
337 | |
338 | if (write && !capable(CAP_SYS_ADMIN)) |
339 | return -EPERM; |
340 | |
341 | tmp.data = &jit_enable; |
342 | ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); |
343 | if (write && !ret) { |
344 | if (jit_enable < 2 || |
345 | (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { |
346 | *(int *)table->data = jit_enable; |
347 | if (jit_enable == 2) |
348 | pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n" ); |
349 | } else { |
350 | ret = -EPERM; |
351 | } |
352 | } |
353 | |
354 | if (write && ret && min == max) |
355 | pr_info_once("CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1.\n" ); |
356 | |
357 | return ret; |
358 | } |
359 | |
360 | # ifdef CONFIG_HAVE_EBPF_JIT |
361 | static int |
362 | proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, |
363 | void *buffer, size_t *lenp, loff_t *ppos) |
364 | { |
365 | if (!capable(CAP_SYS_ADMIN)) |
366 | return -EPERM; |
367 | |
368 | return proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
369 | } |
370 | # endif /* CONFIG_HAVE_EBPF_JIT */ |
371 | |
372 | static int |
373 | proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, |
374 | void *buffer, size_t *lenp, loff_t *ppos) |
375 | { |
376 | if (!capable(CAP_SYS_ADMIN)) |
377 | return -EPERM; |
378 | |
379 | return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); |
380 | } |
381 | #endif |
382 | |
383 | static struct ctl_table net_core_table[] = { |
384 | { |
385 | .procname = "wmem_max" , |
386 | .data = &sysctl_wmem_max, |
387 | .maxlen = sizeof(int), |
388 | .mode = 0644, |
389 | .proc_handler = proc_dointvec_minmax, |
390 | .extra1 = &min_sndbuf, |
391 | }, |
392 | { |
393 | .procname = "rmem_max" , |
394 | .data = &sysctl_rmem_max, |
395 | .maxlen = sizeof(int), |
396 | .mode = 0644, |
397 | .proc_handler = proc_dointvec_minmax, |
398 | .extra1 = &min_rcvbuf, |
399 | }, |
400 | { |
401 | .procname = "wmem_default" , |
402 | .data = &sysctl_wmem_default, |
403 | .maxlen = sizeof(int), |
404 | .mode = 0644, |
405 | .proc_handler = proc_dointvec_minmax, |
406 | .extra1 = &min_sndbuf, |
407 | }, |
408 | { |
409 | .procname = "rmem_default" , |
410 | .data = &sysctl_rmem_default, |
411 | .maxlen = sizeof(int), |
412 | .mode = 0644, |
413 | .proc_handler = proc_dointvec_minmax, |
414 | .extra1 = &min_rcvbuf, |
415 | }, |
416 | { |
417 | .procname = "mem_pcpu_rsv" , |
418 | .data = &sysctl_mem_pcpu_rsv, |
419 | .maxlen = sizeof(int), |
420 | .mode = 0644, |
421 | .proc_handler = proc_dointvec_minmax, |
422 | .extra1 = &min_mem_pcpu_rsv, |
423 | }, |
424 | { |
425 | .procname = "dev_weight" , |
426 | .data = &weight_p, |
427 | .maxlen = sizeof(int), |
428 | .mode = 0644, |
429 | .proc_handler = proc_do_dev_weight, |
430 | }, |
431 | { |
432 | .procname = "dev_weight_rx_bias" , |
433 | .data = &dev_weight_rx_bias, |
434 | .maxlen = sizeof(int), |
435 | .mode = 0644, |
436 | .proc_handler = proc_do_dev_weight, |
437 | }, |
438 | { |
439 | .procname = "dev_weight_tx_bias" , |
440 | .data = &dev_weight_tx_bias, |
441 | .maxlen = sizeof(int), |
442 | .mode = 0644, |
443 | .proc_handler = proc_do_dev_weight, |
444 | }, |
445 | { |
446 | .procname = "netdev_max_backlog" , |
447 | .data = &net_hotdata.max_backlog, |
448 | .maxlen = sizeof(int), |
449 | .mode = 0644, |
450 | .proc_handler = proc_dointvec |
451 | }, |
452 | { |
453 | .procname = "netdev_rss_key" , |
454 | .data = &netdev_rss_key, |
455 | .maxlen = sizeof(int), |
456 | .mode = 0444, |
457 | .proc_handler = proc_do_rss_key, |
458 | }, |
459 | #ifdef CONFIG_BPF_JIT |
460 | { |
461 | .procname = "bpf_jit_enable" , |
462 | .data = &bpf_jit_enable, |
463 | .maxlen = sizeof(int), |
464 | .mode = 0644, |
465 | .proc_handler = proc_dointvec_minmax_bpf_enable, |
466 | # ifdef CONFIG_BPF_JIT_ALWAYS_ON |
467 | .extra1 = SYSCTL_ONE, |
468 | .extra2 = SYSCTL_ONE, |
469 | # else |
470 | .extra1 = SYSCTL_ZERO, |
471 | .extra2 = SYSCTL_TWO, |
472 | # endif |
473 | }, |
474 | # ifdef CONFIG_HAVE_EBPF_JIT |
475 | { |
476 | .procname = "bpf_jit_harden" , |
477 | .data = &bpf_jit_harden, |
478 | .maxlen = sizeof(int), |
479 | .mode = 0600, |
480 | .proc_handler = proc_dointvec_minmax_bpf_restricted, |
481 | .extra1 = SYSCTL_ZERO, |
482 | .extra2 = SYSCTL_TWO, |
483 | }, |
484 | { |
485 | .procname = "bpf_jit_kallsyms" , |
486 | .data = &bpf_jit_kallsyms, |
487 | .maxlen = sizeof(int), |
488 | .mode = 0600, |
489 | .proc_handler = proc_dointvec_minmax_bpf_restricted, |
490 | .extra1 = SYSCTL_ZERO, |
491 | .extra2 = SYSCTL_ONE, |
492 | }, |
493 | # endif |
494 | { |
495 | .procname = "bpf_jit_limit" , |
496 | .data = &bpf_jit_limit, |
497 | .maxlen = sizeof(long), |
498 | .mode = 0600, |
499 | .proc_handler = proc_dolongvec_minmax_bpf_restricted, |
500 | .extra1 = SYSCTL_LONG_ONE, |
501 | .extra2 = &bpf_jit_limit_max, |
502 | }, |
503 | #endif |
504 | { |
505 | .procname = "netdev_tstamp_prequeue" , |
506 | .data = &net_hotdata.tstamp_prequeue, |
507 | .maxlen = sizeof(int), |
508 | .mode = 0644, |
509 | .proc_handler = proc_dointvec |
510 | }, |
511 | { |
512 | .procname = "message_cost" , |
513 | .data = &net_ratelimit_state.interval, |
514 | .maxlen = sizeof(int), |
515 | .mode = 0644, |
516 | .proc_handler = proc_dointvec_jiffies, |
517 | }, |
518 | { |
519 | .procname = "message_burst" , |
520 | .data = &net_ratelimit_state.burst, |
521 | .maxlen = sizeof(int), |
522 | .mode = 0644, |
523 | .proc_handler = proc_dointvec, |
524 | }, |
525 | { |
526 | .procname = "tstamp_allow_data" , |
527 | .data = &sysctl_tstamp_allow_data, |
528 | .maxlen = sizeof(int), |
529 | .mode = 0644, |
530 | .proc_handler = proc_dointvec_minmax, |
531 | .extra1 = SYSCTL_ZERO, |
532 | .extra2 = SYSCTL_ONE |
533 | }, |
534 | #ifdef CONFIG_RPS |
535 | { |
536 | .procname = "rps_sock_flow_entries" , |
537 | .maxlen = sizeof(int), |
538 | .mode = 0644, |
539 | .proc_handler = rps_sock_flow_sysctl |
540 | }, |
541 | #endif |
542 | #ifdef CONFIG_NET_FLOW_LIMIT |
543 | { |
544 | .procname = "flow_limit_cpu_bitmap" , |
545 | .mode = 0644, |
546 | .proc_handler = flow_limit_cpu_sysctl |
547 | }, |
548 | { |
549 | .procname = "flow_limit_table_len" , |
550 | .data = &netdev_flow_limit_table_len, |
551 | .maxlen = sizeof(int), |
552 | .mode = 0644, |
553 | .proc_handler = flow_limit_table_len_sysctl |
554 | }, |
555 | #endif /* CONFIG_NET_FLOW_LIMIT */ |
556 | #ifdef CONFIG_NET_RX_BUSY_POLL |
557 | { |
558 | .procname = "busy_poll" , |
559 | .data = &sysctl_net_busy_poll, |
560 | .maxlen = sizeof(unsigned int), |
561 | .mode = 0644, |
562 | .proc_handler = proc_dointvec_minmax, |
563 | .extra1 = SYSCTL_ZERO, |
564 | }, |
565 | { |
566 | .procname = "busy_read" , |
567 | .data = &sysctl_net_busy_read, |
568 | .maxlen = sizeof(unsigned int), |
569 | .mode = 0644, |
570 | .proc_handler = proc_dointvec_minmax, |
571 | .extra1 = SYSCTL_ZERO, |
572 | }, |
573 | #endif |
574 | #ifdef CONFIG_NET_SCHED |
575 | { |
576 | .procname = "default_qdisc" , |
577 | .mode = 0644, |
578 | .maxlen = IFNAMSIZ, |
579 | .proc_handler = set_default_qdisc |
580 | }, |
581 | #endif |
582 | { |
583 | .procname = "netdev_budget" , |
584 | .data = &net_hotdata.netdev_budget, |
585 | .maxlen = sizeof(int), |
586 | .mode = 0644, |
587 | .proc_handler = proc_dointvec |
588 | }, |
589 | { |
590 | .procname = "warnings" , |
591 | .data = &net_msg_warn, |
592 | .maxlen = sizeof(int), |
593 | .mode = 0644, |
594 | .proc_handler = proc_dointvec |
595 | }, |
596 | { |
597 | .procname = "max_skb_frags" , |
598 | .data = &sysctl_max_skb_frags, |
599 | .maxlen = sizeof(int), |
600 | .mode = 0644, |
601 | .proc_handler = proc_dointvec_minmax, |
602 | .extra1 = SYSCTL_ONE, |
603 | .extra2 = &max_skb_frags, |
604 | }, |
605 | { |
606 | .procname = "netdev_budget_usecs" , |
607 | .data = &net_hotdata.netdev_budget_usecs, |
608 | .maxlen = sizeof(unsigned int), |
609 | .mode = 0644, |
610 | .proc_handler = proc_dointvec_minmax, |
611 | .extra1 = SYSCTL_ZERO, |
612 | }, |
613 | { |
614 | .procname = "fb_tunnels_only_for_init_net" , |
615 | .data = &sysctl_fb_tunnels_only_for_init_net, |
616 | .maxlen = sizeof(int), |
617 | .mode = 0644, |
618 | .proc_handler = proc_dointvec_minmax, |
619 | .extra1 = SYSCTL_ZERO, |
620 | .extra2 = SYSCTL_TWO, |
621 | }, |
622 | { |
623 | .procname = "devconf_inherit_init_net" , |
624 | .data = &sysctl_devconf_inherit_init_net, |
625 | .maxlen = sizeof(int), |
626 | .mode = 0644, |
627 | .proc_handler = proc_dointvec_minmax, |
628 | .extra1 = SYSCTL_ZERO, |
629 | .extra2 = SYSCTL_THREE, |
630 | }, |
631 | { |
632 | .procname = "high_order_alloc_disable" , |
633 | .data = &net_high_order_alloc_disable_key.key, |
634 | .maxlen = sizeof(net_high_order_alloc_disable_key), |
635 | .mode = 0644, |
636 | .proc_handler = proc_do_static_key, |
637 | }, |
638 | { |
639 | .procname = "gro_normal_batch" , |
640 | .data = &net_hotdata.gro_normal_batch, |
641 | .maxlen = sizeof(unsigned int), |
642 | .mode = 0644, |
643 | .proc_handler = proc_dointvec_minmax, |
644 | .extra1 = SYSCTL_ONE, |
645 | }, |
646 | { |
647 | .procname = "netdev_unregister_timeout_secs" , |
648 | .data = &netdev_unregister_timeout_secs, |
649 | .maxlen = sizeof(unsigned int), |
650 | .mode = 0644, |
651 | .proc_handler = proc_dointvec_minmax, |
652 | .extra1 = SYSCTL_ONE, |
653 | .extra2 = &int_3600, |
654 | }, |
655 | { |
656 | .procname = "skb_defer_max" , |
657 | .data = &sysctl_skb_defer_max, |
658 | .maxlen = sizeof(unsigned int), |
659 | .mode = 0644, |
660 | .proc_handler = proc_dointvec_minmax, |
661 | .extra1 = SYSCTL_ZERO, |
662 | }, |
663 | { } |
664 | }; |
665 | |
666 | static struct ctl_table netns_core_table[] = { |
667 | #if IS_ENABLED(CONFIG_RPS) |
668 | { |
669 | .procname = "rps_default_mask" , |
670 | .data = &init_net, |
671 | .mode = 0644, |
672 | .proc_handler = rps_default_mask_sysctl |
673 | }, |
674 | #endif |
675 | { |
676 | .procname = "somaxconn" , |
677 | .data = &init_net.core.sysctl_somaxconn, |
678 | .maxlen = sizeof(int), |
679 | .mode = 0644, |
680 | .extra1 = SYSCTL_ZERO, |
681 | .proc_handler = proc_dointvec_minmax |
682 | }, |
683 | { |
684 | .procname = "optmem_max" , |
685 | .data = &init_net.core.sysctl_optmem_max, |
686 | .maxlen = sizeof(int), |
687 | .mode = 0644, |
688 | .extra1 = SYSCTL_ZERO, |
689 | .proc_handler = proc_dointvec_minmax |
690 | }, |
691 | { |
692 | .procname = "txrehash" , |
693 | .data = &init_net.core.sysctl_txrehash, |
694 | .maxlen = sizeof(u8), |
695 | .mode = 0644, |
696 | .extra1 = SYSCTL_ZERO, |
697 | .extra2 = SYSCTL_ONE, |
698 | .proc_handler = proc_dou8vec_minmax, |
699 | }, |
700 | { } |
701 | }; |
702 | |
703 | static int __init fb_tunnels_only_for_init_net_sysctl_setup(char *str) |
704 | { |
705 | /* fallback tunnels for initns only */ |
706 | if (!strncmp(str, "initns" , 6)) |
707 | sysctl_fb_tunnels_only_for_init_net = 1; |
708 | /* no fallback tunnels anywhere */ |
709 | else if (!strncmp(str, "none" , 4)) |
710 | sysctl_fb_tunnels_only_for_init_net = 2; |
711 | |
712 | return 1; |
713 | } |
714 | __setup("fb_tunnels=" , fb_tunnels_only_for_init_net_sysctl_setup); |
715 | |
716 | static __net_init int sysctl_core_net_init(struct net *net) |
717 | { |
718 | struct ctl_table *tbl, *tmp; |
719 | |
720 | tbl = netns_core_table; |
721 | if (!net_eq(net1: net, net2: &init_net)) { |
722 | tbl = kmemdup(p: tbl, size: sizeof(netns_core_table), GFP_KERNEL); |
723 | if (tbl == NULL) |
724 | goto err_dup; |
725 | |
726 | for (tmp = tbl; tmp->procname; tmp++) |
727 | tmp->data += (char *)net - (char *)&init_net; |
728 | } |
729 | |
730 | net->core.sysctl_hdr = register_net_sysctl_sz(net, path: "net/core" , table: tbl, |
731 | ARRAY_SIZE(netns_core_table)); |
732 | if (net->core.sysctl_hdr == NULL) |
733 | goto err_reg; |
734 | |
735 | return 0; |
736 | |
737 | err_reg: |
738 | if (tbl != netns_core_table) |
739 | kfree(objp: tbl); |
740 | err_dup: |
741 | return -ENOMEM; |
742 | } |
743 | |
744 | static __net_exit void sysctl_core_net_exit(struct net *net) |
745 | { |
746 | struct ctl_table *tbl; |
747 | |
748 | tbl = net->core.sysctl_hdr->ctl_table_arg; |
749 | unregister_net_sysctl_table(header: net->core.sysctl_hdr); |
750 | BUG_ON(tbl == netns_core_table); |
751 | #if IS_ENABLED(CONFIG_RPS) |
752 | kfree(objp: net->core.rps_default_mask); |
753 | #endif |
754 | kfree(objp: tbl); |
755 | } |
756 | |
757 | static __net_initdata struct pernet_operations sysctl_core_ops = { |
758 | .init = sysctl_core_net_init, |
759 | .exit = sysctl_core_net_exit, |
760 | }; |
761 | |
762 | static __init int sysctl_core_init(void) |
763 | { |
764 | register_net_sysctl(&init_net, "net/core" , net_core_table); |
765 | return register_pernet_subsys(&sysctl_core_ops); |
766 | } |
767 | |
768 | fs_initcall(sysctl_core_init); |
769 | |