| 1 | /* |
| 2 | * Copyright (c) 2017 Mellanox Technologies Inc. All rights reserved. |
| 3 | * Copyright (c) 2010 Voltaire Inc. All rights reserved. |
| 4 | * |
| 5 | * This software is available to you under a choice of one of two |
| 6 | * licenses. You may choose to be licensed under the terms of the GNU |
| 7 | * General Public License (GPL) Version 2, available from the file |
| 8 | * COPYING in the main directory of this source tree, or the |
| 9 | * OpenIB.org BSD license below: |
| 10 | * |
| 11 | * Redistribution and use in source and binary forms, with or |
| 12 | * without modification, are permitted provided that the following |
| 13 | * conditions are met: |
| 14 | * |
| 15 | * - Redistributions of source code must retain the above |
| 16 | * copyright notice, this list of conditions and the following |
| 17 | * disclaimer. |
| 18 | * |
| 19 | * - Redistributions in binary form must reproduce the above |
| 20 | * copyright notice, this list of conditions and the following |
| 21 | * disclaimer in the documentation and/or other materials |
| 22 | * provided with the distribution. |
| 23 | * |
| 24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
| 25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
| 26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
| 27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
| 28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
| 29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
| 30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 31 | * SOFTWARE. |
| 32 | */ |
| 33 | |
| 34 | #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ |
| 35 | |
| 36 | #include <linux/export.h> |
| 37 | #include <net/netlink.h> |
| 38 | #include <net/net_namespace.h> |
| 39 | #include <net/netns/generic.h> |
| 40 | #include <net/sock.h> |
| 41 | #include <rdma/rdma_netlink.h> |
| 42 | #include <linux/module.h> |
| 43 | #include "core_priv.h" |
| 44 | |
| 45 | static struct { |
| 46 | const struct rdma_nl_cbs *cb_table; |
| 47 | /* Synchronizes between ongoing netlink commands and netlink client |
| 48 | * unregistration. |
| 49 | */ |
| 50 | struct rw_semaphore sem; |
| 51 | } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; |
| 52 | |
| 53 | bool rdma_nl_chk_listeners(unsigned int group) |
| 54 | { |
| 55 | struct rdma_dev_net *rnet = rdma_net_to_dev_net(net: &init_net); |
| 56 | |
| 57 | return netlink_has_listeners(sk: rnet->nl_sock, group); |
| 58 | } |
| 59 | EXPORT_SYMBOL(rdma_nl_chk_listeners); |
| 60 | |
| 61 | static bool is_nl_msg_valid(unsigned int type, unsigned int op) |
| 62 | { |
| 63 | static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS] = { |
| 64 | [RDMA_NL_IWCM] = RDMA_NL_IWPM_NUM_OPS, |
| 65 | [RDMA_NL_LS] = RDMA_NL_LS_NUM_OPS, |
| 66 | [RDMA_NL_NLDEV] = RDMA_NLDEV_NUM_OPS, |
| 67 | }; |
| 68 | |
| 69 | /* |
| 70 | * This BUILD_BUG_ON is intended to catch addition of new |
| 71 | * RDMA netlink protocol without updating the array above. |
| 72 | */ |
| 73 | BUILD_BUG_ON(RDMA_NL_NUM_CLIENTS != 6); |
| 74 | |
| 75 | if (type >= RDMA_NL_NUM_CLIENTS) |
| 76 | return false; |
| 77 | |
| 78 | return op < max_num_ops[type]; |
| 79 | } |
| 80 | |
| 81 | static const struct rdma_nl_cbs * |
| 82 | get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op) |
| 83 | { |
| 84 | const struct rdma_nl_cbs *cb_table; |
| 85 | |
| 86 | /* |
| 87 | * Currently only NLDEV client is supporting netlink commands in |
| 88 | * non init_net net namespace. |
| 89 | */ |
| 90 | if (sock_net(sk: skb->sk) != &init_net && type != RDMA_NL_NLDEV) |
| 91 | return NULL; |
| 92 | |
| 93 | cb_table = READ_ONCE(rdma_nl_types[type].cb_table); |
| 94 | if (!cb_table) { |
| 95 | /* |
| 96 | * Didn't get valid reference of the table, attempt module |
| 97 | * load once. |
| 98 | */ |
| 99 | up_read(sem: &rdma_nl_types[type].sem); |
| 100 | |
| 101 | request_module("rdma-netlink-subsys-%u" , type); |
| 102 | |
| 103 | down_read(sem: &rdma_nl_types[type].sem); |
| 104 | cb_table = READ_ONCE(rdma_nl_types[type].cb_table); |
| 105 | } |
| 106 | if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) |
| 107 | return NULL; |
| 108 | return cb_table; |
| 109 | } |
| 110 | |
| 111 | void rdma_nl_register(unsigned int index, |
| 112 | const struct rdma_nl_cbs cb_table[]) |
| 113 | { |
| 114 | if (WARN_ON(!is_nl_msg_valid(index, 0)) || |
| 115 | WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table))) |
| 116 | return; |
| 117 | |
| 118 | /* Pairs with the READ_ONCE in is_nl_valid() */ |
| 119 | smp_store_release(&rdma_nl_types[index].cb_table, cb_table); |
| 120 | } |
| 121 | EXPORT_SYMBOL(rdma_nl_register); |
| 122 | |
| 123 | void rdma_nl_unregister(unsigned int index) |
| 124 | { |
| 125 | down_write(sem: &rdma_nl_types[index].sem); |
| 126 | rdma_nl_types[index].cb_table = NULL; |
| 127 | up_write(sem: &rdma_nl_types[index].sem); |
| 128 | } |
| 129 | EXPORT_SYMBOL(rdma_nl_unregister); |
| 130 | |
| 131 | void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, |
| 132 | int len, int client, int op, int flags) |
| 133 | { |
| 134 | *nlh = nlmsg_put(skb, portid: 0, seq, RDMA_NL_GET_TYPE(client, op), payload: len, flags); |
| 135 | if (!*nlh) |
| 136 | return NULL; |
| 137 | return nlmsg_data(nlh: *nlh); |
| 138 | } |
| 139 | EXPORT_SYMBOL(ibnl_put_msg); |
| 140 | |
| 141 | int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, |
| 142 | int len, void *data, int type) |
| 143 | { |
| 144 | if (nla_put(skb, attrtype: type, attrlen: len, data)) { |
| 145 | nlmsg_cancel(skb, nlh); |
| 146 | return -EMSGSIZE; |
| 147 | } |
| 148 | return 0; |
| 149 | } |
| 150 | EXPORT_SYMBOL(ibnl_put_attr); |
| 151 | |
| 152 | static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, |
| 153 | struct netlink_ext_ack *extack) |
| 154 | { |
| 155 | int type = nlh->nlmsg_type; |
| 156 | unsigned int index = RDMA_NL_GET_CLIENT(type); |
| 157 | unsigned int op = RDMA_NL_GET_OP(type); |
| 158 | const struct rdma_nl_cbs *cb_table; |
| 159 | int err = -EINVAL; |
| 160 | |
| 161 | if (!is_nl_msg_valid(type: index, op)) |
| 162 | return -EINVAL; |
| 163 | |
| 164 | down_read(sem: &rdma_nl_types[index].sem); |
| 165 | cb_table = get_cb_table(skb, type: index, op); |
| 166 | if (!cb_table) |
| 167 | goto done; |
| 168 | |
| 169 | if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) && |
| 170 | !netlink_capable(skb, CAP_NET_ADMIN)) { |
| 171 | err = -EPERM; |
| 172 | goto done; |
| 173 | } |
| 174 | |
| 175 | /* |
| 176 | * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't |
| 177 | * mistakenly call the .dump() function. |
| 178 | */ |
| 179 | if (index == RDMA_NL_LS) { |
| 180 | if (cb_table[op].doit) |
| 181 | err = cb_table[op].doit(skb, nlh, extack); |
| 182 | goto done; |
| 183 | } |
| 184 | /* FIXME: Convert IWCM to properly handle doit callbacks */ |
| 185 | if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) { |
| 186 | struct netlink_dump_control c = { |
| 187 | .dump = cb_table[op].dump, |
| 188 | }; |
| 189 | if (c.dump) |
| 190 | err = netlink_dump_start(ssk: skb->sk, skb, nlh, control: &c); |
| 191 | goto done; |
| 192 | } |
| 193 | |
| 194 | if (cb_table[op].doit) |
| 195 | err = cb_table[op].doit(skb, nlh, extack); |
| 196 | done: |
| 197 | up_read(sem: &rdma_nl_types[index].sem); |
| 198 | return err; |
| 199 | } |
| 200 | |
| 201 | /* |
| 202 | * This function is similar to netlink_rcv_skb with one exception: |
| 203 | * It calls to the callback for the netlink messages without NLM_F_REQUEST |
| 204 | * flag. These messages are intended for RDMA_NL_LS consumer, so it is allowed |
| 205 | * for that consumer only. |
| 206 | */ |
| 207 | static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, |
| 208 | struct nlmsghdr *, |
| 209 | struct netlink_ext_ack *)) |
| 210 | { |
| 211 | struct netlink_ext_ack extack = {}; |
| 212 | struct nlmsghdr *nlh; |
| 213 | int err; |
| 214 | |
| 215 | while (skb->len >= nlmsg_total_size(payload: 0)) { |
| 216 | int msglen; |
| 217 | |
| 218 | nlh = nlmsg_hdr(skb); |
| 219 | err = 0; |
| 220 | |
| 221 | if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) |
| 222 | return 0; |
| 223 | |
| 224 | /* |
| 225 | * Generally speaking, the only requests are handled |
| 226 | * by the kernel, but RDMA_NL_LS is different, because it |
| 227 | * runs backward netlink scheme. Kernel initiates messages |
| 228 | * and waits for reply with data to keep pathrecord cache |
| 229 | * in sync. |
| 230 | */ |
| 231 | if (!(nlh->nlmsg_flags & NLM_F_REQUEST) && |
| 232 | (RDMA_NL_GET_CLIENT(nlh->nlmsg_type) != RDMA_NL_LS)) |
| 233 | goto ack; |
| 234 | |
| 235 | /* Skip control messages */ |
| 236 | if (nlh->nlmsg_type < NLMSG_MIN_TYPE) |
| 237 | goto ack; |
| 238 | |
| 239 | err = cb(skb, nlh, &extack); |
| 240 | if (err == -EINTR) |
| 241 | goto skip; |
| 242 | |
| 243 | ack: |
| 244 | if (nlh->nlmsg_flags & NLM_F_ACK || err) |
| 245 | netlink_ack(in_skb: skb, nlh, err, extack: &extack); |
| 246 | |
| 247 | skip: |
| 248 | msglen = NLMSG_ALIGN(nlh->nlmsg_len); |
| 249 | if (msglen > skb->len) |
| 250 | msglen = skb->len; |
| 251 | skb_pull(skb, len: msglen); |
| 252 | } |
| 253 | |
| 254 | return 0; |
| 255 | } |
| 256 | |
| 257 | static void rdma_nl_rcv(struct sk_buff *skb) |
| 258 | { |
| 259 | rdma_nl_rcv_skb(skb, cb: &rdma_nl_rcv_msg); |
| 260 | } |
| 261 | |
| 262 | int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid) |
| 263 | { |
| 264 | struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); |
| 265 | int err; |
| 266 | |
| 267 | err = netlink_unicast(ssk: rnet->nl_sock, skb, portid: pid, MSG_DONTWAIT); |
| 268 | return (err < 0) ? err : 0; |
| 269 | } |
| 270 | EXPORT_SYMBOL(rdma_nl_unicast); |
| 271 | |
| 272 | int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid) |
| 273 | { |
| 274 | struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); |
| 275 | int err; |
| 276 | |
| 277 | err = netlink_unicast(ssk: rnet->nl_sock, skb, portid: pid, nonblock: 0); |
| 278 | return (err < 0) ? err : 0; |
| 279 | } |
| 280 | EXPORT_SYMBOL(rdma_nl_unicast_wait); |
| 281 | |
| 282 | int rdma_nl_multicast(struct net *net, struct sk_buff *skb, |
| 283 | unsigned int group, gfp_t flags) |
| 284 | { |
| 285 | struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); |
| 286 | |
| 287 | return nlmsg_multicast(sk: rnet->nl_sock, skb, portid: 0, group, flags); |
| 288 | } |
| 289 | EXPORT_SYMBOL(rdma_nl_multicast); |
| 290 | |
| 291 | void rdma_nl_init(void) |
| 292 | { |
| 293 | int idx; |
| 294 | |
| 295 | for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) |
| 296 | init_rwsem(&rdma_nl_types[idx].sem); |
| 297 | } |
| 298 | |
| 299 | void rdma_nl_exit(void) |
| 300 | { |
| 301 | int idx; |
| 302 | |
| 303 | for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) |
| 304 | WARN(rdma_nl_types[idx].cb_table, |
| 305 | "Netlink client %d wasn't released prior to unloading %s\n" , |
| 306 | idx, KBUILD_MODNAME); |
| 307 | } |
| 308 | |
| 309 | int rdma_nl_net_init(struct rdma_dev_net *rnet) |
| 310 | { |
| 311 | struct net *net = read_pnet(pnet: &rnet->net); |
| 312 | struct netlink_kernel_cfg cfg = { |
| 313 | .input = rdma_nl_rcv, |
| 314 | .flags = NL_CFG_F_NONROOT_RECV, |
| 315 | }; |
| 316 | struct sock *nls; |
| 317 | |
| 318 | nls = netlink_kernel_create(net, NETLINK_RDMA, cfg: &cfg); |
| 319 | if (!nls) |
| 320 | return -ENOMEM; |
| 321 | |
| 322 | nls->sk_sndtimeo = 10 * HZ; |
| 323 | rnet->nl_sock = nls; |
| 324 | return 0; |
| 325 | } |
| 326 | |
| 327 | void rdma_nl_net_exit(struct rdma_dev_net *rnet) |
| 328 | { |
| 329 | netlink_kernel_release(sk: rnet->nl_sock); |
| 330 | } |
| 331 | |
| 332 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA); |
| 333 | |