1 | /* |
2 | * Copyright (c) 2017 Mellanox Technologies Inc. All rights reserved. |
3 | * Copyright (c) 2010 Voltaire Inc. All rights reserved. |
4 | * |
5 | * This software is available to you under a choice of one of two |
6 | * licenses. You may choose to be licensed under the terms of the GNU |
7 | * General Public License (GPL) Version 2, available from the file |
8 | * COPYING in the main directory of this source tree, or the |
9 | * OpenIB.org BSD license below: |
10 | * |
11 | * Redistribution and use in source and binary forms, with or |
12 | * without modification, are permitted provided that the following |
13 | * conditions are met: |
14 | * |
15 | * - Redistributions of source code must retain the above |
16 | * copyright notice, this list of conditions and the following |
17 | * disclaimer. |
18 | * |
19 | * - Redistributions in binary form must reproduce the above |
20 | * copyright notice, this list of conditions and the following |
21 | * disclaimer in the documentation and/or other materials |
22 | * provided with the distribution. |
23 | * |
24 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
25 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
26 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
27 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
28 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
29 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
30 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
31 | * SOFTWARE. |
32 | */ |
33 | |
34 | #define pr_fmt(fmt) "%s:%s: " fmt, KBUILD_MODNAME, __func__ |
35 | |
36 | #include <linux/export.h> |
37 | #include <net/netlink.h> |
38 | #include <net/net_namespace.h> |
39 | #include <net/netns/generic.h> |
40 | #include <net/sock.h> |
41 | #include <rdma/rdma_netlink.h> |
42 | #include <linux/module.h> |
43 | #include "core_priv.h" |
44 | |
45 | static struct { |
46 | const struct rdma_nl_cbs *cb_table; |
47 | /* Synchronizes between ongoing netlink commands and netlink client |
48 | * unregistration. |
49 | */ |
50 | struct rw_semaphore sem; |
51 | } rdma_nl_types[RDMA_NL_NUM_CLIENTS]; |
52 | |
53 | bool rdma_nl_chk_listeners(unsigned int group) |
54 | { |
55 | struct rdma_dev_net *rnet = rdma_net_to_dev_net(net: &init_net); |
56 | |
57 | return netlink_has_listeners(sk: rnet->nl_sock, group); |
58 | } |
59 | EXPORT_SYMBOL(rdma_nl_chk_listeners); |
60 | |
61 | static bool is_nl_msg_valid(unsigned int type, unsigned int op) |
62 | { |
63 | static const unsigned int max_num_ops[RDMA_NL_NUM_CLIENTS] = { |
64 | [RDMA_NL_IWCM] = RDMA_NL_IWPM_NUM_OPS, |
65 | [RDMA_NL_LS] = RDMA_NL_LS_NUM_OPS, |
66 | [RDMA_NL_NLDEV] = RDMA_NLDEV_NUM_OPS, |
67 | }; |
68 | |
69 | /* |
70 | * This BUILD_BUG_ON is intended to catch addition of new |
71 | * RDMA netlink protocol without updating the array above. |
72 | */ |
73 | BUILD_BUG_ON(RDMA_NL_NUM_CLIENTS != 6); |
74 | |
75 | if (type >= RDMA_NL_NUM_CLIENTS) |
76 | return false; |
77 | |
78 | return op < max_num_ops[type]; |
79 | } |
80 | |
81 | static const struct rdma_nl_cbs * |
82 | get_cb_table(const struct sk_buff *skb, unsigned int type, unsigned int op) |
83 | { |
84 | const struct rdma_nl_cbs *cb_table; |
85 | |
86 | /* |
87 | * Currently only NLDEV client is supporting netlink commands in |
88 | * non init_net net namespace. |
89 | */ |
90 | if (sock_net(sk: skb->sk) != &init_net && type != RDMA_NL_NLDEV) |
91 | return NULL; |
92 | |
93 | cb_table = READ_ONCE(rdma_nl_types[type].cb_table); |
94 | if (!cb_table) { |
95 | /* |
96 | * Didn't get valid reference of the table, attempt module |
97 | * load once. |
98 | */ |
99 | up_read(sem: &rdma_nl_types[type].sem); |
100 | |
101 | request_module("rdma-netlink-subsys-%u" , type); |
102 | |
103 | down_read(sem: &rdma_nl_types[type].sem); |
104 | cb_table = READ_ONCE(rdma_nl_types[type].cb_table); |
105 | } |
106 | if (!cb_table || (!cb_table[op].dump && !cb_table[op].doit)) |
107 | return NULL; |
108 | return cb_table; |
109 | } |
110 | |
111 | void rdma_nl_register(unsigned int index, |
112 | const struct rdma_nl_cbs cb_table[]) |
113 | { |
114 | if (WARN_ON(!is_nl_msg_valid(index, 0)) || |
115 | WARN_ON(READ_ONCE(rdma_nl_types[index].cb_table))) |
116 | return; |
117 | |
118 | /* Pairs with the READ_ONCE in is_nl_valid() */ |
119 | smp_store_release(&rdma_nl_types[index].cb_table, cb_table); |
120 | } |
121 | EXPORT_SYMBOL(rdma_nl_register); |
122 | |
123 | void rdma_nl_unregister(unsigned int index) |
124 | { |
125 | down_write(sem: &rdma_nl_types[index].sem); |
126 | rdma_nl_types[index].cb_table = NULL; |
127 | up_write(sem: &rdma_nl_types[index].sem); |
128 | } |
129 | EXPORT_SYMBOL(rdma_nl_unregister); |
130 | |
131 | void *ibnl_put_msg(struct sk_buff *skb, struct nlmsghdr **nlh, int seq, |
132 | int len, int client, int op, int flags) |
133 | { |
134 | *nlh = nlmsg_put(skb, portid: 0, seq, RDMA_NL_GET_TYPE(client, op), payload: len, flags); |
135 | if (!*nlh) |
136 | return NULL; |
137 | return nlmsg_data(nlh: *nlh); |
138 | } |
139 | EXPORT_SYMBOL(ibnl_put_msg); |
140 | |
141 | int ibnl_put_attr(struct sk_buff *skb, struct nlmsghdr *nlh, |
142 | int len, void *data, int type) |
143 | { |
144 | if (nla_put(skb, attrtype: type, attrlen: len, data)) { |
145 | nlmsg_cancel(skb, nlh); |
146 | return -EMSGSIZE; |
147 | } |
148 | return 0; |
149 | } |
150 | EXPORT_SYMBOL(ibnl_put_attr); |
151 | |
152 | static int rdma_nl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, |
153 | struct netlink_ext_ack *extack) |
154 | { |
155 | int type = nlh->nlmsg_type; |
156 | unsigned int index = RDMA_NL_GET_CLIENT(type); |
157 | unsigned int op = RDMA_NL_GET_OP(type); |
158 | const struct rdma_nl_cbs *cb_table; |
159 | int err = -EINVAL; |
160 | |
161 | if (!is_nl_msg_valid(type: index, op)) |
162 | return -EINVAL; |
163 | |
164 | down_read(sem: &rdma_nl_types[index].sem); |
165 | cb_table = get_cb_table(skb, type: index, op); |
166 | if (!cb_table) |
167 | goto done; |
168 | |
169 | if ((cb_table[op].flags & RDMA_NL_ADMIN_PERM) && |
170 | !netlink_capable(skb, CAP_NET_ADMIN)) { |
171 | err = -EPERM; |
172 | goto done; |
173 | } |
174 | |
175 | /* |
176 | * LS responses overload the 0x100 (NLM_F_ROOT) flag. Don't |
177 | * mistakenly call the .dump() function. |
178 | */ |
179 | if (index == RDMA_NL_LS) { |
180 | if (cb_table[op].doit) |
181 | err = cb_table[op].doit(skb, nlh, extack); |
182 | goto done; |
183 | } |
184 | /* FIXME: Convert IWCM to properly handle doit callbacks */ |
185 | if ((nlh->nlmsg_flags & NLM_F_DUMP) || index == RDMA_NL_IWCM) { |
186 | struct netlink_dump_control c = { |
187 | .dump = cb_table[op].dump, |
188 | }; |
189 | if (c.dump) |
190 | err = netlink_dump_start(ssk: skb->sk, skb, nlh, control: &c); |
191 | goto done; |
192 | } |
193 | |
194 | if (cb_table[op].doit) |
195 | err = cb_table[op].doit(skb, nlh, extack); |
196 | done: |
197 | up_read(sem: &rdma_nl_types[index].sem); |
198 | return err; |
199 | } |
200 | |
201 | /* |
202 | * This function is similar to netlink_rcv_skb with one exception: |
203 | * It calls to the callback for the netlink messages without NLM_F_REQUEST |
204 | * flag. These messages are intended for RDMA_NL_LS consumer, so it is allowed |
205 | * for that consumer only. |
206 | */ |
207 | static int rdma_nl_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, |
208 | struct nlmsghdr *, |
209 | struct netlink_ext_ack *)) |
210 | { |
211 | struct netlink_ext_ack extack = {}; |
212 | struct nlmsghdr *nlh; |
213 | int err; |
214 | |
215 | while (skb->len >= nlmsg_total_size(payload: 0)) { |
216 | int msglen; |
217 | |
218 | nlh = nlmsg_hdr(skb); |
219 | err = 0; |
220 | |
221 | if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) |
222 | return 0; |
223 | |
224 | /* |
225 | * Generally speaking, the only requests are handled |
226 | * by the kernel, but RDMA_NL_LS is different, because it |
227 | * runs backward netlink scheme. Kernel initiates messages |
228 | * and waits for reply with data to keep pathrecord cache |
229 | * in sync. |
230 | */ |
231 | if (!(nlh->nlmsg_flags & NLM_F_REQUEST) && |
232 | (RDMA_NL_GET_CLIENT(nlh->nlmsg_type) != RDMA_NL_LS)) |
233 | goto ack; |
234 | |
235 | /* Skip control messages */ |
236 | if (nlh->nlmsg_type < NLMSG_MIN_TYPE) |
237 | goto ack; |
238 | |
239 | err = cb(skb, nlh, &extack); |
240 | if (err == -EINTR) |
241 | goto skip; |
242 | |
243 | ack: |
244 | if (nlh->nlmsg_flags & NLM_F_ACK || err) |
245 | netlink_ack(in_skb: skb, nlh, err, extack: &extack); |
246 | |
247 | skip: |
248 | msglen = NLMSG_ALIGN(nlh->nlmsg_len); |
249 | if (msglen > skb->len) |
250 | msglen = skb->len; |
251 | skb_pull(skb, len: msglen); |
252 | } |
253 | |
254 | return 0; |
255 | } |
256 | |
257 | static void rdma_nl_rcv(struct sk_buff *skb) |
258 | { |
259 | rdma_nl_rcv_skb(skb, cb: &rdma_nl_rcv_msg); |
260 | } |
261 | |
262 | int rdma_nl_unicast(struct net *net, struct sk_buff *skb, u32 pid) |
263 | { |
264 | struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); |
265 | int err; |
266 | |
267 | err = netlink_unicast(ssk: rnet->nl_sock, skb, portid: pid, MSG_DONTWAIT); |
268 | return (err < 0) ? err : 0; |
269 | } |
270 | EXPORT_SYMBOL(rdma_nl_unicast); |
271 | |
272 | int rdma_nl_unicast_wait(struct net *net, struct sk_buff *skb, __u32 pid) |
273 | { |
274 | struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); |
275 | int err; |
276 | |
277 | err = netlink_unicast(ssk: rnet->nl_sock, skb, portid: pid, nonblock: 0); |
278 | return (err < 0) ? err : 0; |
279 | } |
280 | EXPORT_SYMBOL(rdma_nl_unicast_wait); |
281 | |
282 | int rdma_nl_multicast(struct net *net, struct sk_buff *skb, |
283 | unsigned int group, gfp_t flags) |
284 | { |
285 | struct rdma_dev_net *rnet = rdma_net_to_dev_net(net); |
286 | |
287 | return nlmsg_multicast(sk: rnet->nl_sock, skb, portid: 0, group, flags); |
288 | } |
289 | EXPORT_SYMBOL(rdma_nl_multicast); |
290 | |
291 | void rdma_nl_init(void) |
292 | { |
293 | int idx; |
294 | |
295 | for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) |
296 | init_rwsem(&rdma_nl_types[idx].sem); |
297 | } |
298 | |
299 | void rdma_nl_exit(void) |
300 | { |
301 | int idx; |
302 | |
303 | for (idx = 0; idx < RDMA_NL_NUM_CLIENTS; idx++) |
304 | WARN(rdma_nl_types[idx].cb_table, |
305 | "Netlink client %d wasn't released prior to unloading %s\n" , |
306 | idx, KBUILD_MODNAME); |
307 | } |
308 | |
309 | int rdma_nl_net_init(struct rdma_dev_net *rnet) |
310 | { |
311 | struct net *net = read_pnet(pnet: &rnet->net); |
312 | struct netlink_kernel_cfg cfg = { |
313 | .input = rdma_nl_rcv, |
314 | }; |
315 | struct sock *nls; |
316 | |
317 | nls = netlink_kernel_create(net, NETLINK_RDMA, cfg: &cfg); |
318 | if (!nls) |
319 | return -ENOMEM; |
320 | |
321 | nls->sk_sndtimeo = 10 * HZ; |
322 | rnet->nl_sock = nls; |
323 | return 0; |
324 | } |
325 | |
326 | void rdma_nl_net_exit(struct rdma_dev_net *rnet) |
327 | { |
328 | netlink_kernel_release(sk: rnet->nl_sock); |
329 | } |
330 | |
331 | MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_RDMA); |
332 | |