| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
| 4 | * |
| 5 | * smc_sysctl.c: sysctl interface to SMC subsystem. |
| 6 | * |
| 7 | * Copyright (c) 2022, Alibaba Inc. |
| 8 | * |
| 9 | * Author: Tony Lu <tonylu@linux.alibaba.com> |
| 10 | * |
| 11 | */ |
| 12 | |
| 13 | #include <linux/init.h> |
| 14 | #include <linux/sysctl.h> |
| 15 | #include <linux/bpf.h> |
| 16 | #include <net/net_namespace.h> |
| 17 | |
| 18 | #include "smc.h" |
| 19 | #include "smc_core.h" |
| 20 | #include "smc_llc.h" |
| 21 | #include "smc_sysctl.h" |
| 22 | #include "smc_hs_bpf.h" |
| 23 | |
| 24 | static int min_sndbuf = SMC_BUF_MIN_SIZE; |
| 25 | static int min_rcvbuf = SMC_BUF_MIN_SIZE; |
| 26 | static int max_sndbuf = INT_MAX / 2; |
| 27 | static int max_rcvbuf = INT_MAX / 2; |
| 28 | static const int net_smc_wmem_init = (64 * 1024); |
| 29 | static const int net_smc_rmem_init = (64 * 1024); |
| 30 | static int links_per_lgr_min = SMC_LINKS_ADD_LNK_MIN; |
| 31 | static int links_per_lgr_max = SMC_LINKS_ADD_LNK_MAX; |
| 32 | static int conns_per_lgr_min = SMC_CONN_PER_LGR_MIN; |
| 33 | static int conns_per_lgr_max = SMC_CONN_PER_LGR_MAX; |
| 34 | static unsigned int smcr_max_wr_min = 2; |
| 35 | static unsigned int smcr_max_wr_max = 2048; |
| 36 | |
| 37 | #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) |
| 38 | static int smc_net_replace_smc_hs_ctrl(struct net *net, const char *name) |
| 39 | { |
| 40 | struct smc_hs_ctrl *ctrl = NULL; |
| 41 | |
| 42 | rcu_read_lock(); |
| 43 | /* null or empty name ask to clear current ctrl */ |
| 44 | if (name && name[0]) { |
| 45 | ctrl = smc_hs_ctrl_find_by_name(name); |
| 46 | if (!ctrl) { |
| 47 | rcu_read_unlock(); |
| 48 | return -EINVAL; |
| 49 | } |
| 50 | /* no change, just return */ |
| 51 | if (ctrl == rcu_dereference(net->smc.hs_ctrl)) { |
| 52 | rcu_read_unlock(); |
| 53 | return 0; |
| 54 | } |
| 55 | if (!bpf_try_module_get(data: ctrl, owner: ctrl->owner)) { |
| 56 | rcu_read_unlock(); |
| 57 | return -EBUSY; |
| 58 | } |
| 59 | } |
| 60 | /* xhcg old ctrl with the new one atomically */ |
| 61 | ctrl = unrcu_pointer(xchg(&net->smc.hs_ctrl, RCU_INITIALIZER(ctrl))); |
| 62 | /* release old ctrl */ |
| 63 | if (ctrl) |
| 64 | bpf_module_put(data: ctrl, owner: ctrl->owner); |
| 65 | |
| 66 | rcu_read_unlock(); |
| 67 | return 0; |
| 68 | } |
| 69 | |
| 70 | static int proc_smc_hs_ctrl(const struct ctl_table *ctl, int write, |
| 71 | void *buffer, size_t *lenp, loff_t *ppos) |
| 72 | { |
| 73 | struct net *net = container_of(ctl->data, struct net, smc.hs_ctrl); |
| 74 | char val[SMC_HS_CTRL_NAME_MAX]; |
| 75 | const struct ctl_table tbl = { |
| 76 | .data = val, |
| 77 | .maxlen = SMC_HS_CTRL_NAME_MAX, |
| 78 | }; |
| 79 | struct smc_hs_ctrl *ctrl; |
| 80 | int ret; |
| 81 | |
| 82 | rcu_read_lock(); |
| 83 | ctrl = rcu_dereference(net->smc.hs_ctrl); |
| 84 | if (ctrl) |
| 85 | memcpy(val, ctrl->name, sizeof(ctrl->name)); |
| 86 | else |
| 87 | val[0] = '\0'; |
| 88 | rcu_read_unlock(); |
| 89 | |
| 90 | ret = proc_dostring(&tbl, write, buffer, lenp, ppos); |
| 91 | if (ret) |
| 92 | return ret; |
| 93 | |
| 94 | if (write) |
| 95 | ret = smc_net_replace_smc_hs_ctrl(net, name: val); |
| 96 | return ret; |
| 97 | } |
| 98 | #endif /* CONFIG_SMC_HS_CTRL_BPF */ |
| 99 | |
| 100 | static struct ctl_table smc_table[] = { |
| 101 | { |
| 102 | .procname = "autocorking_size" , |
| 103 | .data = &init_net.smc.sysctl_autocorking_size, |
| 104 | .maxlen = sizeof(unsigned int), |
| 105 | .mode = 0644, |
| 106 | .proc_handler = proc_douintvec, |
| 107 | }, |
| 108 | { |
| 109 | .procname = "smcr_buf_type" , |
| 110 | .data = &init_net.smc.sysctl_smcr_buf_type, |
| 111 | .maxlen = sizeof(unsigned int), |
| 112 | .mode = 0644, |
| 113 | .proc_handler = proc_douintvec_minmax, |
| 114 | .extra1 = SYSCTL_ZERO, |
| 115 | .extra2 = SYSCTL_TWO, |
| 116 | }, |
| 117 | { |
| 118 | .procname = "smcr_testlink_time" , |
| 119 | .data = &init_net.smc.sysctl_smcr_testlink_time, |
| 120 | .maxlen = sizeof(int), |
| 121 | .mode = 0644, |
| 122 | .proc_handler = proc_dointvec_jiffies, |
| 123 | }, |
| 124 | { |
| 125 | .procname = "wmem" , |
| 126 | .data = &init_net.smc.sysctl_wmem, |
| 127 | .maxlen = sizeof(int), |
| 128 | .mode = 0644, |
| 129 | .proc_handler = proc_dointvec_minmax, |
| 130 | .extra1 = &min_sndbuf, |
| 131 | .extra2 = &max_sndbuf, |
| 132 | }, |
| 133 | { |
| 134 | .procname = "rmem" , |
| 135 | .data = &init_net.smc.sysctl_rmem, |
| 136 | .maxlen = sizeof(int), |
| 137 | .mode = 0644, |
| 138 | .proc_handler = proc_dointvec_minmax, |
| 139 | .extra1 = &min_rcvbuf, |
| 140 | .extra2 = &max_rcvbuf, |
| 141 | }, |
| 142 | { |
| 143 | .procname = "smcr_max_links_per_lgr" , |
| 144 | .data = &init_net.smc.sysctl_max_links_per_lgr, |
| 145 | .maxlen = sizeof(int), |
| 146 | .mode = 0644, |
| 147 | .proc_handler = proc_dointvec_minmax, |
| 148 | .extra1 = &links_per_lgr_min, |
| 149 | .extra2 = &links_per_lgr_max, |
| 150 | }, |
| 151 | { |
| 152 | .procname = "smcr_max_conns_per_lgr" , |
| 153 | .data = &init_net.smc.sysctl_max_conns_per_lgr, |
| 154 | .maxlen = sizeof(int), |
| 155 | .mode = 0644, |
| 156 | .proc_handler = proc_dointvec_minmax, |
| 157 | .extra1 = &conns_per_lgr_min, |
| 158 | .extra2 = &conns_per_lgr_max, |
| 159 | }, |
| 160 | { |
| 161 | .procname = "limit_smc_hs" , |
| 162 | .data = &init_net.smc.limit_smc_hs, |
| 163 | .maxlen = sizeof(int), |
| 164 | .mode = 0644, |
| 165 | .proc_handler = proc_dointvec_minmax, |
| 166 | .extra1 = SYSCTL_ZERO, |
| 167 | .extra2 = SYSCTL_ONE, |
| 168 | }, |
| 169 | { |
| 170 | .procname = "smcr_max_send_wr" , |
| 171 | .data = &init_net.smc.sysctl_smcr_max_send_wr, |
| 172 | .maxlen = sizeof(int), |
| 173 | .mode = 0644, |
| 174 | .proc_handler = proc_dointvec_minmax, |
| 175 | .extra1 = &smcr_max_wr_min, |
| 176 | .extra2 = &smcr_max_wr_max, |
| 177 | }, |
| 178 | { |
| 179 | .procname = "smcr_max_recv_wr" , |
| 180 | .data = &init_net.smc.sysctl_smcr_max_recv_wr, |
| 181 | .maxlen = sizeof(int), |
| 182 | .mode = 0644, |
| 183 | .proc_handler = proc_dointvec_minmax, |
| 184 | .extra1 = &smcr_max_wr_min, |
| 185 | .extra2 = &smcr_max_wr_max, |
| 186 | }, |
| 187 | #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) |
| 188 | { |
| 189 | .procname = "hs_ctrl" , |
| 190 | .data = &init_net.smc.hs_ctrl, |
| 191 | .mode = 0644, |
| 192 | .maxlen = SMC_HS_CTRL_NAME_MAX, |
| 193 | .proc_handler = proc_smc_hs_ctrl, |
| 194 | }, |
| 195 | #endif /* CONFIG_SMC_HS_CTRL_BPF */ |
| 196 | }; |
| 197 | |
| 198 | int __net_init smc_sysctl_net_init(struct net *net) |
| 199 | { |
| 200 | size_t table_size = ARRAY_SIZE(smc_table); |
| 201 | struct ctl_table *table; |
| 202 | |
| 203 | table = smc_table; |
| 204 | if (!net_eq(net1: net, net2: &init_net)) { |
| 205 | int i; |
| 206 | #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) |
| 207 | struct smc_hs_ctrl *ctrl; |
| 208 | |
| 209 | rcu_read_lock(); |
| 210 | ctrl = rcu_dereference(init_net.smc.hs_ctrl); |
| 211 | if (ctrl && ctrl->flags & SMC_HS_CTRL_FLAG_INHERITABLE && |
| 212 | bpf_try_module_get(data: ctrl, owner: ctrl->owner)) |
| 213 | rcu_assign_pointer(net->smc.hs_ctrl, ctrl); |
| 214 | rcu_read_unlock(); |
| 215 | #endif /* CONFIG_SMC_HS_CTRL_BPF */ |
| 216 | |
| 217 | table = kmemdup(table, sizeof(smc_table), GFP_KERNEL); |
| 218 | if (!table) |
| 219 | goto err_alloc; |
| 220 | |
| 221 | for (i = 0; i < table_size; i++) |
| 222 | table[i].data += (void *)net - (void *)&init_net; |
| 223 | } |
| 224 | |
| 225 | net->smc.smc_hdr = register_net_sysctl_sz(net, path: "net/smc" , table, |
| 226 | table_size); |
| 227 | if (!net->smc.smc_hdr) |
| 228 | goto err_reg; |
| 229 | |
| 230 | net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; |
| 231 | net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; |
| 232 | net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; |
| 233 | WRITE_ONCE(net->smc.sysctl_wmem, net_smc_wmem_init); |
| 234 | WRITE_ONCE(net->smc.sysctl_rmem, net_smc_rmem_init); |
| 235 | net->smc.sysctl_max_links_per_lgr = SMC_LINKS_PER_LGR_MAX_PREFER; |
| 236 | net->smc.sysctl_max_conns_per_lgr = SMC_CONN_PER_LGR_PREFER; |
| 237 | net->smc.sysctl_smcr_max_send_wr = SMCR_MAX_SEND_WR_DEF; |
| 238 | net->smc.sysctl_smcr_max_recv_wr = SMCR_MAX_RECV_WR_DEF; |
| 239 | /* disable handshake limitation by default */ |
| 240 | net->smc.limit_smc_hs = 0; |
| 241 | |
| 242 | return 0; |
| 243 | |
| 244 | err_reg: |
| 245 | if (!net_eq(net1: net, net2: &init_net)) |
| 246 | kfree(objp: table); |
| 247 | err_alloc: |
| 248 | #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) |
| 249 | smc_net_replace_smc_hs_ctrl(net, NULL); |
| 250 | #endif /* CONFIG_SMC_HS_CTRL_BPF */ |
| 251 | return -ENOMEM; |
| 252 | } |
| 253 | |
| 254 | void __net_exit smc_sysctl_net_exit(struct net *net) |
| 255 | { |
| 256 | const struct ctl_table *table; |
| 257 | |
| 258 | table = net->smc.smc_hdr->ctl_table_arg; |
| 259 | unregister_net_sysctl_table(header: net->smc.smc_hdr); |
| 260 | #if IS_ENABLED(CONFIG_SMC_HS_CTRL_BPF) |
| 261 | smc_net_replace_smc_hs_ctrl(net, NULL); |
| 262 | #endif /* CONFIG_SMC_HS_CTRL_BPF */ |
| 263 | |
| 264 | if (!net_eq(net1: net, net2: &init_net)) |
| 265 | kfree(objp: table); |
| 266 | } |
| 267 | |