1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Shared Memory Communications over RDMA (SMC-R) and RoCE |
4 | * |
5 | * Basic Transport Functions exploiting Infiniband API |
6 | * |
7 | * Copyright IBM Corp. 2016 |
8 | * |
9 | * Author(s): Ursula Braun <ubraun@linux.vnet.ibm.com> |
10 | */ |
11 | |
12 | #include <linux/socket.h> |
13 | #include <linux/if_vlan.h> |
14 | #include <linux/random.h> |
15 | #include <linux/workqueue.h> |
16 | #include <linux/wait.h> |
17 | #include <linux/reboot.h> |
18 | #include <linux/mutex.h> |
19 | #include <linux/list.h> |
20 | #include <linux/smc.h> |
21 | #include <net/tcp.h> |
22 | #include <net/sock.h> |
23 | #include <rdma/ib_verbs.h> |
24 | #include <rdma/ib_cache.h> |
25 | |
26 | #include "smc.h" |
27 | #include "smc_clc.h" |
28 | #include "smc_core.h" |
29 | #include "smc_ib.h" |
30 | #include "smc_wr.h" |
31 | #include "smc_llc.h" |
32 | #include "smc_cdc.h" |
33 | #include "smc_close.h" |
34 | #include "smc_ism.h" |
35 | #include "smc_netlink.h" |
36 | #include "smc_stats.h" |
37 | #include "smc_tracepoint.h" |
38 | |
39 | #define SMC_LGR_NUM_INCR 256 |
40 | #define SMC_LGR_FREE_DELAY_SERV (600 * HZ) |
41 | #define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10 * HZ) |
42 | |
43 | struct smc_lgr_list smc_lgr_list = { /* established link groups */ |
44 | .lock = __SPIN_LOCK_UNLOCKED(smc_lgr_list.lock), |
45 | .list = LIST_HEAD_INIT(smc_lgr_list.list), |
46 | .num = 0, |
47 | }; |
48 | |
49 | static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ |
50 | static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); |
51 | |
52 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, |
53 | struct smc_buf_desc *buf_desc); |
54 | static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); |
55 | |
56 | static void smc_link_down_work(struct work_struct *work); |
57 | |
58 | /* return head of link group list and its lock for a given link group */ |
59 | static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr, |
60 | spinlock_t **lgr_lock) |
61 | { |
62 | if (lgr->is_smcd) { |
63 | *lgr_lock = &lgr->smcd->lgr_lock; |
64 | return &lgr->smcd->lgr_list; |
65 | } |
66 | |
67 | *lgr_lock = &smc_lgr_list.lock; |
68 | return &smc_lgr_list.list; |
69 | } |
70 | |
71 | static void smc_ibdev_cnt_inc(struct smc_link *lnk) |
72 | { |
73 | atomic_inc(v: &lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]); |
74 | } |
75 | |
76 | static void smc_ibdev_cnt_dec(struct smc_link *lnk) |
77 | { |
78 | atomic_dec(v: &lnk->smcibdev->lnk_cnt_by_port[lnk->ibport - 1]); |
79 | } |
80 | |
81 | static void smc_lgr_schedule_free_work(struct smc_link_group *lgr) |
82 | { |
83 | /* client link group creation always follows the server link group |
84 | * creation. For client use a somewhat higher removal delay time, |
85 | * otherwise there is a risk of out-of-sync link groups. |
86 | */ |
87 | if (!lgr->freeing) { |
88 | mod_delayed_work(wq: system_wq, dwork: &lgr->free_work, |
89 | delay: (!lgr->is_smcd && lgr->role == SMC_CLNT) ? |
90 | SMC_LGR_FREE_DELAY_CLNT : |
91 | SMC_LGR_FREE_DELAY_SERV); |
92 | } |
93 | } |
94 | |
95 | /* Register connection's alert token in our lookup structure. |
96 | * To use rbtrees we have to implement our own insert core. |
97 | * Requires @conns_lock |
98 | * @smc connection to register |
99 | * Returns 0 on success, != otherwise. |
100 | */ |
101 | static void smc_lgr_add_alert_token(struct smc_connection *conn) |
102 | { |
103 | struct rb_node **link, *parent = NULL; |
104 | u32 token = conn->alert_token_local; |
105 | |
106 | link = &conn->lgr->conns_all.rb_node; |
107 | while (*link) { |
108 | struct smc_connection *cur = rb_entry(*link, |
109 | struct smc_connection, alert_node); |
110 | |
111 | parent = *link; |
112 | if (cur->alert_token_local > token) |
113 | link = &parent->rb_left; |
114 | else |
115 | link = &parent->rb_right; |
116 | } |
117 | /* Put the new node there */ |
118 | rb_link_node(node: &conn->alert_node, parent, rb_link: link); |
119 | rb_insert_color(&conn->alert_node, &conn->lgr->conns_all); |
120 | } |
121 | |
122 | /* assign an SMC-R link to the connection */ |
123 | static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first) |
124 | { |
125 | enum smc_link_state expected = first ? SMC_LNK_ACTIVATING : |
126 | SMC_LNK_ACTIVE; |
127 | int i, j; |
128 | |
129 | /* do link balancing */ |
130 | conn->lnk = NULL; /* reset conn->lnk first */ |
131 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
132 | struct smc_link *lnk = &conn->lgr->lnk[i]; |
133 | |
134 | if (lnk->state != expected || lnk->link_is_asym) |
135 | continue; |
136 | if (conn->lgr->role == SMC_CLNT) { |
137 | conn->lnk = lnk; /* temporary, SMC server assigns link*/ |
138 | break; |
139 | } |
140 | if (conn->lgr->conns_num % 2) { |
141 | for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) { |
142 | struct smc_link *lnk2; |
143 | |
144 | lnk2 = &conn->lgr->lnk[j]; |
145 | if (lnk2->state == expected && |
146 | !lnk2->link_is_asym) { |
147 | conn->lnk = lnk2; |
148 | break; |
149 | } |
150 | } |
151 | } |
152 | if (!conn->lnk) |
153 | conn->lnk = lnk; |
154 | break; |
155 | } |
156 | if (!conn->lnk) |
157 | return SMC_CLC_DECL_NOACTLINK; |
158 | atomic_inc(v: &conn->lnk->conn_cnt); |
159 | return 0; |
160 | } |
161 | |
162 | /* Register connection in link group by assigning an alert token |
163 | * registered in a search tree. |
164 | * Requires @conns_lock |
165 | * Note that '0' is a reserved value and not assigned. |
166 | */ |
167 | static int smc_lgr_register_conn(struct smc_connection *conn, bool first) |
168 | { |
169 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
170 | static atomic_t nexttoken = ATOMIC_INIT(0); |
171 | int rc; |
172 | |
173 | if (!conn->lgr->is_smcd) { |
174 | rc = smcr_lgr_conn_assign_link(conn, first); |
175 | if (rc) { |
176 | conn->lgr = NULL; |
177 | return rc; |
178 | } |
179 | } |
180 | /* find a new alert_token_local value not yet used by some connection |
181 | * in this link group |
182 | */ |
183 | sock_hold(sk: &smc->sk); /* sock_put in smc_lgr_unregister_conn() */ |
184 | while (!conn->alert_token_local) { |
185 | conn->alert_token_local = atomic_inc_return(v: &nexttoken); |
186 | if (smc_lgr_find_conn(token: conn->alert_token_local, lgr: conn->lgr)) |
187 | conn->alert_token_local = 0; |
188 | } |
189 | smc_lgr_add_alert_token(conn); |
190 | conn->lgr->conns_num++; |
191 | return 0; |
192 | } |
193 | |
194 | /* Unregister connection and reset the alert token of the given connection< |
195 | */ |
196 | static void __smc_lgr_unregister_conn(struct smc_connection *conn) |
197 | { |
198 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
199 | struct smc_link_group *lgr = conn->lgr; |
200 | |
201 | rb_erase(&conn->alert_node, &lgr->conns_all); |
202 | if (conn->lnk) |
203 | atomic_dec(v: &conn->lnk->conn_cnt); |
204 | lgr->conns_num--; |
205 | conn->alert_token_local = 0; |
206 | sock_put(sk: &smc->sk); /* sock_hold in smc_lgr_register_conn() */ |
207 | } |
208 | |
209 | /* Unregister connection from lgr |
210 | */ |
211 | static void smc_lgr_unregister_conn(struct smc_connection *conn) |
212 | { |
213 | struct smc_link_group *lgr = conn->lgr; |
214 | |
215 | if (!smc_conn_lgr_valid(conn)) |
216 | return; |
217 | write_lock_bh(&lgr->conns_lock); |
218 | if (conn->alert_token_local) { |
219 | __smc_lgr_unregister_conn(conn); |
220 | } |
221 | write_unlock_bh(&lgr->conns_lock); |
222 | } |
223 | |
224 | int smc_nl_get_sys_info(struct sk_buff *skb, struct netlink_callback *cb) |
225 | { |
226 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(c: cb); |
227 | char hostname[SMC_MAX_HOSTNAME_LEN + 1]; |
228 | char smc_seid[SMC_MAX_EID_LEN + 1]; |
229 | struct nlattr *attrs; |
230 | u8 *seid = NULL; |
231 | u8 *host = NULL; |
232 | void *nlh; |
233 | |
234 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, seq: cb->nlh->nlmsg_seq, |
235 | family: &smc_gen_nl_family, NLM_F_MULTI, |
236 | cmd: SMC_NETLINK_GET_SYS_INFO); |
237 | if (!nlh) |
238 | goto errmsg; |
239 | if (cb_ctx->pos[0]) |
240 | goto errout; |
241 | attrs = nla_nest_start(skb, attrtype: SMC_GEN_SYS_INFO); |
242 | if (!attrs) |
243 | goto errout; |
244 | if (nla_put_u8(skb, attrtype: SMC_NLA_SYS_VER, SMC_V2)) |
245 | goto errattr; |
246 | if (nla_put_u8(skb, attrtype: SMC_NLA_SYS_REL, SMC_RELEASE)) |
247 | goto errattr; |
248 | if (nla_put_u8(skb, attrtype: SMC_NLA_SYS_IS_ISM_V2, value: smc_ism_is_v2_capable())) |
249 | goto errattr; |
250 | if (nla_put_u8(skb, attrtype: SMC_NLA_SYS_IS_SMCR_V2, value: true)) |
251 | goto errattr; |
252 | smc_clc_get_hostname(host: &host); |
253 | if (host) { |
254 | memcpy(hostname, host, SMC_MAX_HOSTNAME_LEN); |
255 | hostname[SMC_MAX_HOSTNAME_LEN] = 0; |
256 | if (nla_put_string(skb, attrtype: SMC_NLA_SYS_LOCAL_HOST, str: hostname)) |
257 | goto errattr; |
258 | } |
259 | if (smc_ism_is_v2_capable()) { |
260 | smc_ism_get_system_eid(eid: &seid); |
261 | memcpy(smc_seid, seid, SMC_MAX_EID_LEN); |
262 | smc_seid[SMC_MAX_EID_LEN] = 0; |
263 | if (nla_put_string(skb, attrtype: SMC_NLA_SYS_SEID, str: smc_seid)) |
264 | goto errattr; |
265 | } |
266 | nla_nest_end(skb, start: attrs); |
267 | genlmsg_end(skb, hdr: nlh); |
268 | cb_ctx->pos[0] = 1; |
269 | return skb->len; |
270 | |
271 | errattr: |
272 | nla_nest_cancel(skb, start: attrs); |
273 | errout: |
274 | genlmsg_cancel(skb, hdr: nlh); |
275 | errmsg: |
276 | return skb->len; |
277 | } |
278 | |
279 | /* Fill SMC_NLA_LGR_D_V2_COMMON/SMC_NLA_LGR_R_V2_COMMON nested attributes */ |
280 | static int smc_nl_fill_lgr_v2_common(struct smc_link_group *lgr, |
281 | struct sk_buff *skb, |
282 | struct netlink_callback *cb, |
283 | struct nlattr *v2_attrs) |
284 | { |
285 | char smc_host[SMC_MAX_HOSTNAME_LEN + 1]; |
286 | char smc_eid[SMC_MAX_EID_LEN + 1]; |
287 | |
288 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_V2_VER, value: lgr->smc_version)) |
289 | goto errv2attr; |
290 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_V2_REL, value: lgr->peer_smc_release)) |
291 | goto errv2attr; |
292 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_V2_OS, value: lgr->peer_os)) |
293 | goto errv2attr; |
294 | memcpy(smc_host, lgr->peer_hostname, SMC_MAX_HOSTNAME_LEN); |
295 | smc_host[SMC_MAX_HOSTNAME_LEN] = 0; |
296 | if (nla_put_string(skb, attrtype: SMC_NLA_LGR_V2_PEER_HOST, str: smc_host)) |
297 | goto errv2attr; |
298 | memcpy(smc_eid, lgr->negotiated_eid, SMC_MAX_EID_LEN); |
299 | smc_eid[SMC_MAX_EID_LEN] = 0; |
300 | if (nla_put_string(skb, attrtype: SMC_NLA_LGR_V2_NEG_EID, str: smc_eid)) |
301 | goto errv2attr; |
302 | |
303 | nla_nest_end(skb, start: v2_attrs); |
304 | return 0; |
305 | |
306 | errv2attr: |
307 | nla_nest_cancel(skb, start: v2_attrs); |
308 | return -EMSGSIZE; |
309 | } |
310 | |
311 | static int smc_nl_fill_smcr_lgr_v2(struct smc_link_group *lgr, |
312 | struct sk_buff *skb, |
313 | struct netlink_callback *cb) |
314 | { |
315 | struct nlattr *v2_attrs; |
316 | |
317 | v2_attrs = nla_nest_start(skb, attrtype: SMC_NLA_LGR_R_V2); |
318 | if (!v2_attrs) |
319 | goto errattr; |
320 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_V2_DIRECT, value: !lgr->uses_gateway)) |
321 | goto errv2attr; |
322 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_V2_MAX_CONNS, value: lgr->max_conns)) |
323 | goto errv2attr; |
324 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_V2_MAX_LINKS, value: lgr->max_links)) |
325 | goto errv2attr; |
326 | |
327 | nla_nest_end(skb, start: v2_attrs); |
328 | return 0; |
329 | |
330 | errv2attr: |
331 | nla_nest_cancel(skb, start: v2_attrs); |
332 | errattr: |
333 | return -EMSGSIZE; |
334 | } |
335 | |
336 | static int smc_nl_fill_lgr(struct smc_link_group *lgr, |
337 | struct sk_buff *skb, |
338 | struct netlink_callback *cb) |
339 | { |
340 | char smc_target[SMC_MAX_PNETID_LEN + 1]; |
341 | struct nlattr *attrs, *v2_attrs; |
342 | |
343 | attrs = nla_nest_start(skb, attrtype: SMC_GEN_LGR_SMCR); |
344 | if (!attrs) |
345 | goto errout; |
346 | |
347 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_R_ID, value: *((u32 *)&lgr->id))) |
348 | goto errattr; |
349 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_R_CONNS_NUM, value: lgr->conns_num)) |
350 | goto errattr; |
351 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_ROLE, value: lgr->role)) |
352 | goto errattr; |
353 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_TYPE, value: lgr->type)) |
354 | goto errattr; |
355 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_BUF_TYPE, value: lgr->buf_type)) |
356 | goto errattr; |
357 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_R_VLAN_ID, value: lgr->vlan_id)) |
358 | goto errattr; |
359 | if (nla_put_u64_64bit(skb, attrtype: SMC_NLA_LGR_R_NET_COOKIE, |
360 | value: lgr->net->net_cookie, padattr: SMC_NLA_LGR_R_PAD)) |
361 | goto errattr; |
362 | memcpy(smc_target, lgr->pnet_id, SMC_MAX_PNETID_LEN); |
363 | smc_target[SMC_MAX_PNETID_LEN] = 0; |
364 | if (nla_put_string(skb, attrtype: SMC_NLA_LGR_R_PNETID, str: smc_target)) |
365 | goto errattr; |
366 | if (lgr->smc_version > SMC_V1) { |
367 | v2_attrs = nla_nest_start(skb, attrtype: SMC_NLA_LGR_R_V2_COMMON); |
368 | if (!v2_attrs) |
369 | goto errattr; |
370 | if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) |
371 | goto errattr; |
372 | if (smc_nl_fill_smcr_lgr_v2(lgr, skb, cb)) |
373 | goto errattr; |
374 | } |
375 | |
376 | nla_nest_end(skb, start: attrs); |
377 | return 0; |
378 | errattr: |
379 | nla_nest_cancel(skb, start: attrs); |
380 | errout: |
381 | return -EMSGSIZE; |
382 | } |
383 | |
384 | static int smc_nl_fill_lgr_link(struct smc_link_group *lgr, |
385 | struct smc_link *link, |
386 | struct sk_buff *skb, |
387 | struct netlink_callback *cb) |
388 | { |
389 | char smc_ibname[IB_DEVICE_NAME_MAX]; |
390 | u8 smc_gid_target[41]; |
391 | struct nlattr *attrs; |
392 | u32 link_uid = 0; |
393 | void *nlh; |
394 | |
395 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, seq: cb->nlh->nlmsg_seq, |
396 | family: &smc_gen_nl_family, NLM_F_MULTI, |
397 | cmd: SMC_NETLINK_GET_LINK_SMCR); |
398 | if (!nlh) |
399 | goto errmsg; |
400 | |
401 | attrs = nla_nest_start(skb, attrtype: SMC_GEN_LINK_SMCR); |
402 | if (!attrs) |
403 | goto errout; |
404 | |
405 | if (nla_put_u8(skb, attrtype: SMC_NLA_LINK_ID, value: link->link_id)) |
406 | goto errattr; |
407 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_STATE, value: link->state)) |
408 | goto errattr; |
409 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_CONN_CNT, |
410 | value: atomic_read(v: &link->conn_cnt))) |
411 | goto errattr; |
412 | if (nla_put_u8(skb, attrtype: SMC_NLA_LINK_IB_PORT, value: link->ibport)) |
413 | goto errattr; |
414 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_NET_DEV, value: link->ndev_ifidx)) |
415 | goto errattr; |
416 | snprintf(buf: smc_ibname, size: sizeof(smc_ibname), fmt: "%s" , link->ibname); |
417 | if (nla_put_string(skb, attrtype: SMC_NLA_LINK_IB_DEV, str: smc_ibname)) |
418 | goto errattr; |
419 | memcpy(&link_uid, link->link_uid, sizeof(link_uid)); |
420 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_UID, value: link_uid)) |
421 | goto errattr; |
422 | memcpy(&link_uid, link->peer_link_uid, sizeof(link_uid)); |
423 | if (nla_put_u32(skb, attrtype: SMC_NLA_LINK_PEER_UID, value: link_uid)) |
424 | goto errattr; |
425 | memset(smc_gid_target, 0, sizeof(smc_gid_target)); |
426 | smc_gid_be16_convert(buf: smc_gid_target, gid_raw: link->gid); |
427 | if (nla_put_string(skb, attrtype: SMC_NLA_LINK_GID, str: smc_gid_target)) |
428 | goto errattr; |
429 | memset(smc_gid_target, 0, sizeof(smc_gid_target)); |
430 | smc_gid_be16_convert(buf: smc_gid_target, gid_raw: link->peer_gid); |
431 | if (nla_put_string(skb, attrtype: SMC_NLA_LINK_PEER_GID, str: smc_gid_target)) |
432 | goto errattr; |
433 | |
434 | nla_nest_end(skb, start: attrs); |
435 | genlmsg_end(skb, hdr: nlh); |
436 | return 0; |
437 | errattr: |
438 | nla_nest_cancel(skb, start: attrs); |
439 | errout: |
440 | genlmsg_cancel(skb, hdr: nlh); |
441 | errmsg: |
442 | return -EMSGSIZE; |
443 | } |
444 | |
445 | static int smc_nl_handle_lgr(struct smc_link_group *lgr, |
446 | struct sk_buff *skb, |
447 | struct netlink_callback *cb, |
448 | bool list_links) |
449 | { |
450 | void *nlh; |
451 | int i; |
452 | |
453 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, seq: cb->nlh->nlmsg_seq, |
454 | family: &smc_gen_nl_family, NLM_F_MULTI, |
455 | cmd: SMC_NETLINK_GET_LGR_SMCR); |
456 | if (!nlh) |
457 | goto errmsg; |
458 | if (smc_nl_fill_lgr(lgr, skb, cb)) |
459 | goto errout; |
460 | |
461 | genlmsg_end(skb, hdr: nlh); |
462 | if (!list_links) |
463 | goto out; |
464 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
465 | if (!smc_link_usable(lnk: &lgr->lnk[i])) |
466 | continue; |
467 | if (smc_nl_fill_lgr_link(lgr, link: &lgr->lnk[i], skb, cb)) |
468 | goto errout; |
469 | } |
470 | out: |
471 | return 0; |
472 | |
473 | errout: |
474 | genlmsg_cancel(skb, hdr: nlh); |
475 | errmsg: |
476 | return -EMSGSIZE; |
477 | } |
478 | |
479 | static void smc_nl_fill_lgr_list(struct smc_lgr_list *smc_lgr, |
480 | struct sk_buff *skb, |
481 | struct netlink_callback *cb, |
482 | bool list_links) |
483 | { |
484 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(c: cb); |
485 | struct smc_link_group *lgr; |
486 | int snum = cb_ctx->pos[0]; |
487 | int num = 0; |
488 | |
489 | spin_lock_bh(lock: &smc_lgr->lock); |
490 | list_for_each_entry(lgr, &smc_lgr->list, list) { |
491 | if (num < snum) |
492 | goto next; |
493 | if (smc_nl_handle_lgr(lgr, skb, cb, list_links)) |
494 | goto errout; |
495 | next: |
496 | num++; |
497 | } |
498 | errout: |
499 | spin_unlock_bh(lock: &smc_lgr->lock); |
500 | cb_ctx->pos[0] = num; |
501 | } |
502 | |
503 | static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr, |
504 | struct sk_buff *skb, |
505 | struct netlink_callback *cb) |
506 | { |
507 | char smc_pnet[SMC_MAX_PNETID_LEN + 1]; |
508 | struct smcd_dev *smcd = lgr->smcd; |
509 | struct smcd_gid smcd_gid; |
510 | struct nlattr *attrs; |
511 | void *nlh; |
512 | |
513 | nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, seq: cb->nlh->nlmsg_seq, |
514 | family: &smc_gen_nl_family, NLM_F_MULTI, |
515 | cmd: SMC_NETLINK_GET_LGR_SMCD); |
516 | if (!nlh) |
517 | goto errmsg; |
518 | |
519 | attrs = nla_nest_start(skb, attrtype: SMC_GEN_LGR_SMCD); |
520 | if (!attrs) |
521 | goto errout; |
522 | |
523 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_D_ID, value: *((u32 *)&lgr->id))) |
524 | goto errattr; |
525 | smcd->ops->get_local_gid(smcd, &smcd_gid); |
526 | if (nla_put_u64_64bit(skb, attrtype: SMC_NLA_LGR_D_GID, |
527 | value: smcd_gid.gid, padattr: SMC_NLA_LGR_D_PAD)) |
528 | goto errattr; |
529 | if (nla_put_u64_64bit(skb, attrtype: SMC_NLA_LGR_D_EXT_GID, |
530 | value: smcd_gid.gid_ext, padattr: SMC_NLA_LGR_D_PAD)) |
531 | goto errattr; |
532 | if (nla_put_u64_64bit(skb, attrtype: SMC_NLA_LGR_D_PEER_GID, value: lgr->peer_gid.gid, |
533 | padattr: SMC_NLA_LGR_D_PAD)) |
534 | goto errattr; |
535 | if (nla_put_u64_64bit(skb, attrtype: SMC_NLA_LGR_D_PEER_EXT_GID, |
536 | value: lgr->peer_gid.gid_ext, padattr: SMC_NLA_LGR_D_PAD)) |
537 | goto errattr; |
538 | if (nla_put_u8(skb, attrtype: SMC_NLA_LGR_D_VLAN_ID, value: lgr->vlan_id)) |
539 | goto errattr; |
540 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_D_CONNS_NUM, value: lgr->conns_num)) |
541 | goto errattr; |
542 | if (nla_put_u32(skb, attrtype: SMC_NLA_LGR_D_CHID, value: smc_ism_get_chid(dev: lgr->smcd))) |
543 | goto errattr; |
544 | memcpy(smc_pnet, lgr->smcd->pnetid, SMC_MAX_PNETID_LEN); |
545 | smc_pnet[SMC_MAX_PNETID_LEN] = 0; |
546 | if (nla_put_string(skb, attrtype: SMC_NLA_LGR_D_PNETID, str: smc_pnet)) |
547 | goto errattr; |
548 | if (lgr->smc_version > SMC_V1) { |
549 | struct nlattr *v2_attrs; |
550 | |
551 | v2_attrs = nla_nest_start(skb, attrtype: SMC_NLA_LGR_D_V2_COMMON); |
552 | if (!v2_attrs) |
553 | goto errattr; |
554 | if (smc_nl_fill_lgr_v2_common(lgr, skb, cb, v2_attrs)) |
555 | goto errattr; |
556 | } |
557 | nla_nest_end(skb, start: attrs); |
558 | genlmsg_end(skb, hdr: nlh); |
559 | return 0; |
560 | |
561 | errattr: |
562 | nla_nest_cancel(skb, start: attrs); |
563 | errout: |
564 | genlmsg_cancel(skb, hdr: nlh); |
565 | errmsg: |
566 | return -EMSGSIZE; |
567 | } |
568 | |
569 | static int smc_nl_handle_smcd_lgr(struct smcd_dev *dev, |
570 | struct sk_buff *skb, |
571 | struct netlink_callback *cb) |
572 | { |
573 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(c: cb); |
574 | struct smc_link_group *lgr; |
575 | int snum = cb_ctx->pos[1]; |
576 | int rc = 0, num = 0; |
577 | |
578 | spin_lock_bh(lock: &dev->lgr_lock); |
579 | list_for_each_entry(lgr, &dev->lgr_list, list) { |
580 | if (!lgr->is_smcd) |
581 | continue; |
582 | if (num < snum) |
583 | goto next; |
584 | rc = smc_nl_fill_smcd_lgr(lgr, skb, cb); |
585 | if (rc) |
586 | goto errout; |
587 | next: |
588 | num++; |
589 | } |
590 | errout: |
591 | spin_unlock_bh(lock: &dev->lgr_lock); |
592 | cb_ctx->pos[1] = num; |
593 | return rc; |
594 | } |
595 | |
596 | static int smc_nl_fill_smcd_dev(struct smcd_dev_list *dev_list, |
597 | struct sk_buff *skb, |
598 | struct netlink_callback *cb) |
599 | { |
600 | struct smc_nl_dmp_ctx *cb_ctx = smc_nl_dmp_ctx(c: cb); |
601 | struct smcd_dev *smcd_dev; |
602 | int snum = cb_ctx->pos[0]; |
603 | int rc = 0, num = 0; |
604 | |
605 | mutex_lock(&dev_list->mutex); |
606 | list_for_each_entry(smcd_dev, &dev_list->list, list) { |
607 | if (list_empty(head: &smcd_dev->lgr_list)) |
608 | continue; |
609 | if (num < snum) |
610 | goto next; |
611 | rc = smc_nl_handle_smcd_lgr(dev: smcd_dev, skb, cb); |
612 | if (rc) |
613 | goto errout; |
614 | next: |
615 | num++; |
616 | } |
617 | errout: |
618 | mutex_unlock(lock: &dev_list->mutex); |
619 | cb_ctx->pos[0] = num; |
620 | return rc; |
621 | } |
622 | |
623 | int smcr_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) |
624 | { |
625 | bool list_links = false; |
626 | |
627 | smc_nl_fill_lgr_list(smc_lgr: &smc_lgr_list, skb, cb, list_links); |
628 | return skb->len; |
629 | } |
630 | |
631 | int smcr_nl_get_link(struct sk_buff *skb, struct netlink_callback *cb) |
632 | { |
633 | bool list_links = true; |
634 | |
635 | smc_nl_fill_lgr_list(smc_lgr: &smc_lgr_list, skb, cb, list_links); |
636 | return skb->len; |
637 | } |
638 | |
639 | int smcd_nl_get_lgr(struct sk_buff *skb, struct netlink_callback *cb) |
640 | { |
641 | smc_nl_fill_smcd_dev(dev_list: &smcd_dev_list, skb, cb); |
642 | return skb->len; |
643 | } |
644 | |
645 | void smc_lgr_cleanup_early(struct smc_link_group *lgr) |
646 | { |
647 | spinlock_t *lgr_lock; |
648 | |
649 | if (!lgr) |
650 | return; |
651 | |
652 | smc_lgr_list_head(lgr, lgr_lock: &lgr_lock); |
653 | spin_lock_bh(lock: lgr_lock); |
654 | /* do not use this link group for new connections */ |
655 | if (!list_empty(head: &lgr->list)) |
656 | list_del_init(entry: &lgr->list); |
657 | spin_unlock_bh(lock: lgr_lock); |
658 | __smc_lgr_terminate(lgr, soft: true); |
659 | } |
660 | |
661 | static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr) |
662 | { |
663 | int i; |
664 | |
665 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
666 | struct smc_link *lnk = &lgr->lnk[i]; |
667 | |
668 | if (smc_link_sendable(lnk)) |
669 | lnk->state = SMC_LNK_INACTIVE; |
670 | } |
671 | wake_up_all(&lgr->llc_msg_waiter); |
672 | wake_up_all(&lgr->llc_flow_waiter); |
673 | } |
674 | |
675 | static void smc_lgr_free(struct smc_link_group *lgr); |
676 | |
677 | static void smc_lgr_free_work(struct work_struct *work) |
678 | { |
679 | struct smc_link_group *lgr = container_of(to_delayed_work(work), |
680 | struct smc_link_group, |
681 | free_work); |
682 | spinlock_t *lgr_lock; |
683 | bool conns; |
684 | |
685 | smc_lgr_list_head(lgr, lgr_lock: &lgr_lock); |
686 | spin_lock_bh(lock: lgr_lock); |
687 | if (lgr->freeing) { |
688 | spin_unlock_bh(lock: lgr_lock); |
689 | return; |
690 | } |
691 | read_lock_bh(&lgr->conns_lock); |
692 | conns = RB_EMPTY_ROOT(&lgr->conns_all); |
693 | read_unlock_bh(&lgr->conns_lock); |
694 | if (!conns) { /* number of lgr connections is no longer zero */ |
695 | spin_unlock_bh(lock: lgr_lock); |
696 | return; |
697 | } |
698 | list_del_init(entry: &lgr->list); /* remove from smc_lgr_list */ |
699 | lgr->freeing = 1; /* this instance does the freeing, no new schedule */ |
700 | spin_unlock_bh(lock: lgr_lock); |
701 | cancel_delayed_work(dwork: &lgr->free_work); |
702 | |
703 | if (!lgr->is_smcd && !lgr->terminating) |
704 | smc_llc_send_link_delete_all(lgr, ord: true, |
705 | SMC_LLC_DEL_PROG_INIT_TERM); |
706 | if (lgr->is_smcd && !lgr->terminating) |
707 | smc_ism_signal_shutdown(lgr); |
708 | if (!lgr->is_smcd) |
709 | smcr_lgr_link_deactivate_all(lgr); |
710 | smc_lgr_free(lgr); |
711 | } |
712 | |
713 | static void smc_lgr_terminate_work(struct work_struct *work) |
714 | { |
715 | struct smc_link_group *lgr = container_of(work, struct smc_link_group, |
716 | terminate_work); |
717 | |
718 | __smc_lgr_terminate(lgr, soft: true); |
719 | } |
720 | |
721 | /* return next unique link id for the lgr */ |
722 | static u8 smcr_next_link_id(struct smc_link_group *lgr) |
723 | { |
724 | u8 link_id; |
725 | int i; |
726 | |
727 | while (1) { |
728 | again: |
729 | link_id = ++lgr->next_link_id; |
730 | if (!link_id) /* skip zero as link_id */ |
731 | link_id = ++lgr->next_link_id; |
732 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
733 | if (smc_link_usable(lnk: &lgr->lnk[i]) && |
734 | lgr->lnk[i].link_id == link_id) |
735 | goto again; |
736 | } |
737 | break; |
738 | } |
739 | return link_id; |
740 | } |
741 | |
742 | static void smcr_copy_dev_info_to_link(struct smc_link *link) |
743 | { |
744 | struct smc_ib_device *smcibdev = link->smcibdev; |
745 | |
746 | snprintf(buf: link->ibname, size: sizeof(link->ibname), fmt: "%s" , |
747 | smcibdev->ibdev->name); |
748 | link->ndev_ifidx = smcibdev->ndev_ifidx[link->ibport - 1]; |
749 | } |
750 | |
751 | int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, |
752 | u8 link_idx, struct smc_init_info *ini) |
753 | { |
754 | struct smc_ib_device *smcibdev; |
755 | u8 rndvec[3]; |
756 | int rc; |
757 | |
758 | if (lgr->smc_version == SMC_V2) { |
759 | lnk->smcibdev = ini->smcrv2.ib_dev_v2; |
760 | lnk->ibport = ini->smcrv2.ib_port_v2; |
761 | } else { |
762 | lnk->smcibdev = ini->ib_dev; |
763 | lnk->ibport = ini->ib_port; |
764 | } |
765 | get_device(dev: &lnk->smcibdev->ibdev->dev); |
766 | atomic_inc(v: &lnk->smcibdev->lnk_cnt); |
767 | refcount_set(r: &lnk->refcnt, n: 1); /* link refcnt is set to 1 */ |
768 | lnk->clearing = 0; |
769 | lnk->path_mtu = lnk->smcibdev->pattr[lnk->ibport - 1].active_mtu; |
770 | lnk->link_id = smcr_next_link_id(lgr); |
771 | lnk->lgr = lgr; |
772 | smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ |
773 | lnk->link_idx = link_idx; |
774 | lnk->wr_rx_id_compl = 0; |
775 | smc_ibdev_cnt_inc(lnk); |
776 | smcr_copy_dev_info_to_link(link: lnk); |
777 | atomic_set(v: &lnk->conn_cnt, i: 0); |
778 | smc_llc_link_set_uid(link: lnk); |
779 | INIT_WORK(&lnk->link_down_wrk, smc_link_down_work); |
780 | if (!lnk->smcibdev->initialized) { |
781 | rc = (int)smc_ib_setup_per_ibdev(smcibdev: lnk->smcibdev); |
782 | if (rc) |
783 | goto out; |
784 | } |
785 | get_random_bytes(buf: rndvec, len: sizeof(rndvec)); |
786 | lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) + |
787 | (rndvec[2] << 16); |
788 | rc = smc_ib_determine_gid(smcibdev: lnk->smcibdev, ibport: lnk->ibport, |
789 | vlan_id: ini->vlan_id, gid: lnk->gid, sgid_index: &lnk->sgid_index, |
790 | smcrv2: lgr->smc_version == SMC_V2 ? |
791 | &ini->smcrv2 : NULL); |
792 | if (rc) |
793 | goto out; |
794 | rc = smc_llc_link_init(link: lnk); |
795 | if (rc) |
796 | goto out; |
797 | rc = smc_wr_alloc_link_mem(lnk); |
798 | if (rc) |
799 | goto clear_llc_lnk; |
800 | rc = smc_ib_create_protection_domain(lnk); |
801 | if (rc) |
802 | goto free_link_mem; |
803 | rc = smc_ib_create_queue_pair(lnk); |
804 | if (rc) |
805 | goto dealloc_pd; |
806 | rc = smc_wr_create_link(lnk); |
807 | if (rc) |
808 | goto destroy_qp; |
809 | lnk->state = SMC_LNK_ACTIVATING; |
810 | return 0; |
811 | |
812 | destroy_qp: |
813 | smc_ib_destroy_queue_pair(lnk); |
814 | dealloc_pd: |
815 | smc_ib_dealloc_protection_domain(lnk); |
816 | free_link_mem: |
817 | smc_wr_free_link_mem(lnk); |
818 | clear_llc_lnk: |
819 | smc_llc_link_clear(link: lnk, log: false); |
820 | out: |
821 | smc_ibdev_cnt_dec(lnk); |
822 | put_device(dev: &lnk->smcibdev->ibdev->dev); |
823 | smcibdev = lnk->smcibdev; |
824 | memset(lnk, 0, sizeof(struct smc_link)); |
825 | lnk->state = SMC_LNK_UNUSED; |
826 | if (!atomic_dec_return(v: &smcibdev->lnk_cnt)) |
827 | wake_up(&smcibdev->lnks_deleted); |
828 | smc_lgr_put(lgr); /* lgr_hold above */ |
829 | return rc; |
830 | } |
831 | |
832 | /* create a new SMC link group */ |
833 | static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) |
834 | { |
835 | struct smc_link_group *lgr; |
836 | struct list_head *lgr_list; |
837 | struct smcd_dev *smcd; |
838 | struct smc_link *lnk; |
839 | spinlock_t *lgr_lock; |
840 | u8 link_idx; |
841 | int rc = 0; |
842 | int i; |
843 | |
844 | if (ini->is_smcd && ini->vlan_id) { |
845 | if (smc_ism_get_vlan(dev: ini->ism_dev[ini->ism_selected], |
846 | vlan_id: ini->vlan_id)) { |
847 | rc = SMC_CLC_DECL_ISMVLANERR; |
848 | goto out; |
849 | } |
850 | } |
851 | |
852 | lgr = kzalloc(size: sizeof(*lgr), GFP_KERNEL); |
853 | if (!lgr) { |
854 | rc = SMC_CLC_DECL_MEM; |
855 | goto ism_put_vlan; |
856 | } |
857 | lgr->tx_wq = alloc_workqueue(fmt: "smc_tx_wq-%*phN" , flags: 0, max_active: 0, |
858 | SMC_LGR_ID_SIZE, &lgr->id); |
859 | if (!lgr->tx_wq) { |
860 | rc = -ENOMEM; |
861 | goto free_lgr; |
862 | } |
863 | lgr->is_smcd = ini->is_smcd; |
864 | lgr->sync_err = 0; |
865 | lgr->terminating = 0; |
866 | lgr->freeing = 0; |
867 | lgr->vlan_id = ini->vlan_id; |
868 | refcount_set(r: &lgr->refcnt, n: 1); /* set lgr refcnt to 1 */ |
869 | init_rwsem(&lgr->sndbufs_lock); |
870 | init_rwsem(&lgr->rmbs_lock); |
871 | rwlock_init(&lgr->conns_lock); |
872 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
873 | INIT_LIST_HEAD(list: &lgr->sndbufs[i]); |
874 | INIT_LIST_HEAD(list: &lgr->rmbs[i]); |
875 | } |
876 | lgr->next_link_id = 0; |
877 | smc_lgr_list.num += SMC_LGR_NUM_INCR; |
878 | memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE); |
879 | INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work); |
880 | INIT_WORK(&lgr->terminate_work, smc_lgr_terminate_work); |
881 | lgr->conns_all = RB_ROOT; |
882 | if (ini->is_smcd) { |
883 | /* SMC-D specific settings */ |
884 | smcd = ini->ism_dev[ini->ism_selected]; |
885 | get_device(dev: smcd->ops->get_dev(smcd)); |
886 | lgr->peer_gid.gid = |
887 | ini->ism_peer_gid[ini->ism_selected].gid; |
888 | lgr->peer_gid.gid_ext = |
889 | ini->ism_peer_gid[ini->ism_selected].gid_ext; |
890 | lgr->smcd = ini->ism_dev[ini->ism_selected]; |
891 | lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list; |
892 | lgr_lock = &lgr->smcd->lgr_lock; |
893 | lgr->smc_version = ini->smcd_version; |
894 | lgr->peer_shutdown = 0; |
895 | atomic_inc(v: &ini->ism_dev[ini->ism_selected]->lgr_cnt); |
896 | } else { |
897 | /* SMC-R specific settings */ |
898 | struct smc_ib_device *ibdev; |
899 | int ibport; |
900 | |
901 | lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
902 | lgr->smc_version = ini->smcr_version; |
903 | memcpy(lgr->peer_systemid, ini->peer_systemid, |
904 | SMC_SYSTEMID_LEN); |
905 | if (lgr->smc_version == SMC_V2) { |
906 | ibdev = ini->smcrv2.ib_dev_v2; |
907 | ibport = ini->smcrv2.ib_port_v2; |
908 | lgr->saddr = ini->smcrv2.saddr; |
909 | lgr->uses_gateway = ini->smcrv2.uses_gateway; |
910 | memcpy(lgr->nexthop_mac, ini->smcrv2.nexthop_mac, |
911 | ETH_ALEN); |
912 | lgr->max_conns = ini->max_conns; |
913 | lgr->max_links = ini->max_links; |
914 | } else { |
915 | ibdev = ini->ib_dev; |
916 | ibport = ini->ib_port; |
917 | lgr->max_conns = SMC_CONN_PER_LGR_MAX; |
918 | lgr->max_links = SMC_LINKS_ADD_LNK_MAX; |
919 | } |
920 | memcpy(lgr->pnet_id, ibdev->pnetid[ibport - 1], |
921 | SMC_MAX_PNETID_LEN); |
922 | rc = smc_wr_alloc_lgr_mem(lgr); |
923 | if (rc) |
924 | goto free_wq; |
925 | smc_llc_lgr_init(lgr, smc); |
926 | |
927 | link_idx = SMC_SINGLE_LINK; |
928 | lnk = &lgr->lnk[link_idx]; |
929 | rc = smcr_link_init(lgr, lnk, link_idx, ini); |
930 | if (rc) { |
931 | smc_wr_free_lgr_mem(lgr); |
932 | goto free_wq; |
933 | } |
934 | lgr->net = smc_ib_net(smcibdev: lnk->smcibdev); |
935 | lgr_list = &smc_lgr_list.list; |
936 | lgr_lock = &smc_lgr_list.lock; |
937 | lgr->buf_type = lgr->net->smc.sysctl_smcr_buf_type; |
938 | atomic_inc(v: &lgr_cnt); |
939 | } |
940 | smc->conn.lgr = lgr; |
941 | spin_lock_bh(lock: lgr_lock); |
942 | list_add_tail(new: &lgr->list, head: lgr_list); |
943 | spin_unlock_bh(lock: lgr_lock); |
944 | return 0; |
945 | |
946 | free_wq: |
947 | destroy_workqueue(wq: lgr->tx_wq); |
948 | free_lgr: |
949 | kfree(objp: lgr); |
950 | ism_put_vlan: |
951 | if (ini->is_smcd && ini->vlan_id) |
952 | smc_ism_put_vlan(dev: ini->ism_dev[ini->ism_selected], vlan_id: ini->vlan_id); |
953 | out: |
954 | if (rc < 0) { |
955 | if (rc == -ENOMEM) |
956 | rc = SMC_CLC_DECL_MEM; |
957 | else |
958 | rc = SMC_CLC_DECL_INTERR; |
959 | } |
960 | return rc; |
961 | } |
962 | |
963 | static int smc_write_space(struct smc_connection *conn) |
964 | { |
965 | int buffer_len = conn->peer_rmbe_size; |
966 | union smc_host_cursor prod; |
967 | union smc_host_cursor cons; |
968 | int space; |
969 | |
970 | smc_curs_copy(tgt: &prod, src: &conn->local_tx_ctrl.prod, conn); |
971 | smc_curs_copy(tgt: &cons, src: &conn->local_rx_ctrl.cons, conn); |
972 | /* determine rx_buf space */ |
973 | space = buffer_len - smc_curs_diff(size: buffer_len, old: &cons, new: &prod); |
974 | return space; |
975 | } |
976 | |
977 | static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend, |
978 | struct smc_wr_buf *wr_buf) |
979 | { |
980 | struct smc_connection *conn = &smc->conn; |
981 | union smc_host_cursor cons, fin; |
982 | int rc = 0; |
983 | int diff; |
984 | |
985 | smc_curs_copy(tgt: &conn->tx_curs_sent, src: &conn->tx_curs_fin, conn); |
986 | smc_curs_copy(tgt: &fin, src: &conn->local_tx_ctrl_fin, conn); |
987 | /* set prod cursor to old state, enforce tx_rdma_writes() */ |
988 | smc_curs_copy(tgt: &conn->local_tx_ctrl.prod, src: &fin, conn); |
989 | smc_curs_copy(tgt: &cons, src: &conn->local_rx_ctrl.cons, conn); |
990 | |
991 | if (smc_curs_comp(size: conn->peer_rmbe_size, old: &cons, new: &fin) < 0) { |
992 | /* cons cursor advanced more than fin, and prod was set |
993 | * fin above, so now prod is smaller than cons. Fix that. |
994 | */ |
995 | diff = smc_curs_diff(size: conn->peer_rmbe_size, old: &fin, new: &cons); |
996 | smc_curs_add(size: conn->sndbuf_desc->len, |
997 | curs: &conn->tx_curs_sent, value: diff); |
998 | smc_curs_add(size: conn->sndbuf_desc->len, |
999 | curs: &conn->tx_curs_fin, value: diff); |
1000 | |
1001 | smp_mb__before_atomic(); |
1002 | atomic_add(i: diff, v: &conn->sndbuf_space); |
1003 | smp_mb__after_atomic(); |
1004 | |
1005 | smc_curs_add(size: conn->peer_rmbe_size, |
1006 | curs: &conn->local_tx_ctrl.prod, value: diff); |
1007 | smc_curs_add(size: conn->peer_rmbe_size, |
1008 | curs: &conn->local_tx_ctrl_fin, value: diff); |
1009 | } |
1010 | /* recalculate, value is used by tx_rdma_writes() */ |
1011 | atomic_set(v: &smc->conn.peer_rmbe_space, i: smc_write_space(conn)); |
1012 | |
1013 | if (smc->sk.sk_state != SMC_INIT && |
1014 | smc->sk.sk_state != SMC_CLOSED) { |
1015 | rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf); |
1016 | if (!rc) { |
1017 | queue_delayed_work(wq: conn->lgr->tx_wq, dwork: &conn->tx_work, delay: 0); |
1018 | smc->sk.sk_data_ready(&smc->sk); |
1019 | } |
1020 | } else { |
1021 | smc_wr_tx_put_slot(link: conn->lnk, |
1022 | wr_pend_priv: (struct smc_wr_tx_pend_priv *)pend); |
1023 | } |
1024 | return rc; |
1025 | } |
1026 | |
1027 | void smc_switch_link_and_count(struct smc_connection *conn, |
1028 | struct smc_link *to_lnk) |
1029 | { |
1030 | atomic_dec(v: &conn->lnk->conn_cnt); |
1031 | /* link_hold in smc_conn_create() */ |
1032 | smcr_link_put(lnk: conn->lnk); |
1033 | conn->lnk = to_lnk; |
1034 | atomic_inc(v: &conn->lnk->conn_cnt); |
1035 | /* link_put in smc_conn_free() */ |
1036 | smcr_link_hold(lnk: conn->lnk); |
1037 | } |
1038 | |
1039 | struct smc_link *smc_switch_conns(struct smc_link_group *lgr, |
1040 | struct smc_link *from_lnk, bool is_dev_err) |
1041 | { |
1042 | struct smc_link *to_lnk = NULL; |
1043 | struct smc_cdc_tx_pend *pend; |
1044 | struct smc_connection *conn; |
1045 | struct smc_wr_buf *wr_buf; |
1046 | struct smc_sock *smc; |
1047 | struct rb_node *node; |
1048 | int i, rc = 0; |
1049 | |
1050 | /* link is inactive, wake up tx waiters */ |
1051 | smc_wr_wakeup_tx_wait(lnk: from_lnk); |
1052 | |
1053 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1054 | if (!smc_link_active(lnk: &lgr->lnk[i]) || i == from_lnk->link_idx) |
1055 | continue; |
1056 | if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && |
1057 | from_lnk->ibport == lgr->lnk[i].ibport) { |
1058 | continue; |
1059 | } |
1060 | to_lnk = &lgr->lnk[i]; |
1061 | break; |
1062 | } |
1063 | if (!to_lnk || !smc_wr_tx_link_hold(link: to_lnk)) { |
1064 | smc_lgr_terminate_sched(lgr); |
1065 | return NULL; |
1066 | } |
1067 | again: |
1068 | read_lock_bh(&lgr->conns_lock); |
1069 | for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) { |
1070 | conn = rb_entry(node, struct smc_connection, alert_node); |
1071 | if (conn->lnk != from_lnk) |
1072 | continue; |
1073 | smc = container_of(conn, struct smc_sock, conn); |
1074 | /* conn->lnk not yet set in SMC_INIT state */ |
1075 | if (smc->sk.sk_state == SMC_INIT) |
1076 | continue; |
1077 | if (smc->sk.sk_state == SMC_CLOSED || |
1078 | smc->sk.sk_state == SMC_PEERCLOSEWAIT1 || |
1079 | smc->sk.sk_state == SMC_PEERCLOSEWAIT2 || |
1080 | smc->sk.sk_state == SMC_APPFINCLOSEWAIT || |
1081 | smc->sk.sk_state == SMC_APPCLOSEWAIT1 || |
1082 | smc->sk.sk_state == SMC_APPCLOSEWAIT2 || |
1083 | smc->sk.sk_state == SMC_PEERFINCLOSEWAIT || |
1084 | smc->sk.sk_state == SMC_PEERABORTWAIT || |
1085 | smc->sk.sk_state == SMC_PROCESSABORT) { |
1086 | spin_lock_bh(lock: &conn->send_lock); |
1087 | smc_switch_link_and_count(conn, to_lnk); |
1088 | spin_unlock_bh(lock: &conn->send_lock); |
1089 | continue; |
1090 | } |
1091 | sock_hold(sk: &smc->sk); |
1092 | read_unlock_bh(&lgr->conns_lock); |
1093 | /* pre-fetch buffer outside of send_lock, might sleep */ |
1094 | rc = smc_cdc_get_free_slot(conn, link: to_lnk, wr_buf: &wr_buf, NULL, pend: &pend); |
1095 | if (rc) |
1096 | goto err_out; |
1097 | /* avoid race with smcr_tx_sndbuf_nonempty() */ |
1098 | spin_lock_bh(lock: &conn->send_lock); |
1099 | smc_switch_link_and_count(conn, to_lnk); |
1100 | rc = smc_switch_cursor(smc, pend, wr_buf); |
1101 | spin_unlock_bh(lock: &conn->send_lock); |
1102 | sock_put(sk: &smc->sk); |
1103 | if (rc) |
1104 | goto err_out; |
1105 | goto again; |
1106 | } |
1107 | read_unlock_bh(&lgr->conns_lock); |
1108 | smc_wr_tx_link_put(link: to_lnk); |
1109 | return to_lnk; |
1110 | |
1111 | err_out: |
1112 | smcr_link_down_cond_sched(lnk: to_lnk); |
1113 | smc_wr_tx_link_put(link: to_lnk); |
1114 | return NULL; |
1115 | } |
1116 | |
1117 | static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb, |
1118 | struct smc_link_group *lgr) |
1119 | { |
1120 | struct rw_semaphore *lock; /* lock buffer list */ |
1121 | int rc; |
1122 | |
1123 | if (is_rmb && buf_desc->is_conf_rkey && !list_empty(head: &lgr->list)) { |
1124 | /* unregister rmb with peer */ |
1125 | rc = smc_llc_flow_initiate(lgr, type: SMC_LLC_FLOW_RKEY); |
1126 | if (!rc) { |
1127 | /* protect against smc_llc_cli_rkey_exchange() */ |
1128 | down_read(sem: &lgr->llc_conf_mutex); |
1129 | smc_llc_do_delete_rkey(lgr, rmb_desc: buf_desc); |
1130 | buf_desc->is_conf_rkey = false; |
1131 | up_read(sem: &lgr->llc_conf_mutex); |
1132 | smc_llc_flow_stop(lgr, flow: &lgr->llc_flow_lcl); |
1133 | } |
1134 | } |
1135 | |
1136 | if (buf_desc->is_reg_err) { |
1137 | /* buf registration failed, reuse not possible */ |
1138 | lock = is_rmb ? &lgr->rmbs_lock : |
1139 | &lgr->sndbufs_lock; |
1140 | down_write(sem: lock); |
1141 | list_del(entry: &buf_desc->list); |
1142 | up_write(sem: lock); |
1143 | |
1144 | smc_buf_free(lgr, is_rmb, buf_desc); |
1145 | } else { |
1146 | /* memzero_explicit provides potential memory barrier semantics */ |
1147 | memzero_explicit(s: buf_desc->cpu_addr, count: buf_desc->len); |
1148 | WRITE_ONCE(buf_desc->used, 0); |
1149 | } |
1150 | } |
1151 | |
1152 | static void smc_buf_unuse(struct smc_connection *conn, |
1153 | struct smc_link_group *lgr) |
1154 | { |
1155 | if (conn->sndbuf_desc) { |
1156 | if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) { |
1157 | smcr_buf_unuse(buf_desc: conn->sndbuf_desc, is_rmb: false, lgr); |
1158 | } else { |
1159 | memzero_explicit(s: conn->sndbuf_desc->cpu_addr, count: conn->sndbuf_desc->len); |
1160 | WRITE_ONCE(conn->sndbuf_desc->used, 0); |
1161 | } |
1162 | } |
1163 | if (conn->rmb_desc) { |
1164 | if (!lgr->is_smcd) { |
1165 | smcr_buf_unuse(buf_desc: conn->rmb_desc, is_rmb: true, lgr); |
1166 | } else { |
1167 | memzero_explicit(s: conn->rmb_desc->cpu_addr, |
1168 | count: conn->rmb_desc->len + sizeof(struct smcd_cdc_msg)); |
1169 | WRITE_ONCE(conn->rmb_desc->used, 0); |
1170 | } |
1171 | } |
1172 | } |
1173 | |
1174 | /* remove a finished connection from its link group */ |
1175 | void smc_conn_free(struct smc_connection *conn) |
1176 | { |
1177 | struct smc_link_group *lgr = conn->lgr; |
1178 | |
1179 | if (!lgr || conn->freed) |
1180 | /* Connection has never been registered in a |
1181 | * link group, or has already been freed. |
1182 | */ |
1183 | return; |
1184 | |
1185 | conn->freed = 1; |
1186 | if (!smc_conn_lgr_valid(conn)) |
1187 | /* Connection has already unregistered from |
1188 | * link group. |
1189 | */ |
1190 | goto lgr_put; |
1191 | |
1192 | if (lgr->is_smcd) { |
1193 | if (!list_empty(head: &lgr->list)) |
1194 | smc_ism_unset_conn(conn); |
1195 | tasklet_kill(t: &conn->rx_tsklet); |
1196 | } else { |
1197 | smc_cdc_wait_pend_tx_wr(conn); |
1198 | if (current_work() != &conn->abort_work) |
1199 | cancel_work_sync(work: &conn->abort_work); |
1200 | } |
1201 | if (!list_empty(head: &lgr->list)) { |
1202 | smc_buf_unuse(conn, lgr); /* allow buffer reuse */ |
1203 | smc_lgr_unregister_conn(conn); |
1204 | } |
1205 | |
1206 | if (!lgr->conns_num) |
1207 | smc_lgr_schedule_free_work(lgr); |
1208 | lgr_put: |
1209 | if (!lgr->is_smcd) |
1210 | smcr_link_put(lnk: conn->lnk); /* link_hold in smc_conn_create() */ |
1211 | smc_lgr_put(lgr); /* lgr_hold in smc_conn_create() */ |
1212 | } |
1213 | |
1214 | /* unregister a link from a buf_desc */ |
1215 | static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb, |
1216 | struct smc_link *lnk) |
1217 | { |
1218 | if (is_rmb || buf_desc->is_vm) |
1219 | buf_desc->is_reg_mr[lnk->link_idx] = false; |
1220 | if (!buf_desc->is_map_ib[lnk->link_idx]) |
1221 | return; |
1222 | |
1223 | if ((is_rmb || buf_desc->is_vm) && |
1224 | buf_desc->mr[lnk->link_idx]) { |
1225 | smc_ib_put_memory_region(mr: buf_desc->mr[lnk->link_idx]); |
1226 | buf_desc->mr[lnk->link_idx] = NULL; |
1227 | } |
1228 | if (is_rmb) |
1229 | smc_ib_buf_unmap_sg(lnk, buf_slot: buf_desc, data_direction: DMA_FROM_DEVICE); |
1230 | else |
1231 | smc_ib_buf_unmap_sg(lnk, buf_slot: buf_desc, data_direction: DMA_TO_DEVICE); |
1232 | |
1233 | sg_free_table(&buf_desc->sgt[lnk->link_idx]); |
1234 | buf_desc->is_map_ib[lnk->link_idx] = false; |
1235 | } |
1236 | |
1237 | /* unmap all buffers of lgr for a deleted link */ |
1238 | static void smcr_buf_unmap_lgr(struct smc_link *lnk) |
1239 | { |
1240 | struct smc_link_group *lgr = lnk->lgr; |
1241 | struct smc_buf_desc *buf_desc, *bf; |
1242 | int i; |
1243 | |
1244 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
1245 | down_write(sem: &lgr->rmbs_lock); |
1246 | list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) |
1247 | smcr_buf_unmap_link(buf_desc, is_rmb: true, lnk); |
1248 | up_write(sem: &lgr->rmbs_lock); |
1249 | |
1250 | down_write(sem: &lgr->sndbufs_lock); |
1251 | list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], |
1252 | list) |
1253 | smcr_buf_unmap_link(buf_desc, is_rmb: false, lnk); |
1254 | up_write(sem: &lgr->sndbufs_lock); |
1255 | } |
1256 | } |
1257 | |
1258 | static void smcr_rtoken_clear_link(struct smc_link *lnk) |
1259 | { |
1260 | struct smc_link_group *lgr = lnk->lgr; |
1261 | int i; |
1262 | |
1263 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
1264 | lgr->rtokens[i][lnk->link_idx].rkey = 0; |
1265 | lgr->rtokens[i][lnk->link_idx].dma_addr = 0; |
1266 | } |
1267 | } |
1268 | |
1269 | static void __smcr_link_clear(struct smc_link *lnk) |
1270 | { |
1271 | struct smc_link_group *lgr = lnk->lgr; |
1272 | struct smc_ib_device *smcibdev; |
1273 | |
1274 | smc_wr_free_link_mem(lnk); |
1275 | smc_ibdev_cnt_dec(lnk); |
1276 | put_device(dev: &lnk->smcibdev->ibdev->dev); |
1277 | smcibdev = lnk->smcibdev; |
1278 | memset(lnk, 0, sizeof(struct smc_link)); |
1279 | lnk->state = SMC_LNK_UNUSED; |
1280 | if (!atomic_dec_return(v: &smcibdev->lnk_cnt)) |
1281 | wake_up(&smcibdev->lnks_deleted); |
1282 | smc_lgr_put(lgr); /* lgr_hold in smcr_link_init() */ |
1283 | } |
1284 | |
1285 | /* must be called under lgr->llc_conf_mutex lock */ |
1286 | void smcr_link_clear(struct smc_link *lnk, bool log) |
1287 | { |
1288 | if (!lnk->lgr || lnk->clearing || |
1289 | lnk->state == SMC_LNK_UNUSED) |
1290 | return; |
1291 | lnk->clearing = 1; |
1292 | lnk->peer_qpn = 0; |
1293 | smc_llc_link_clear(link: lnk, log); |
1294 | smcr_buf_unmap_lgr(lnk); |
1295 | smcr_rtoken_clear_link(lnk); |
1296 | smc_ib_modify_qp_error(lnk); |
1297 | smc_wr_free_link(lnk); |
1298 | smc_ib_destroy_queue_pair(lnk); |
1299 | smc_ib_dealloc_protection_domain(lnk); |
1300 | smcr_link_put(lnk); /* theoretically last link_put */ |
1301 | } |
1302 | |
1303 | void smcr_link_hold(struct smc_link *lnk) |
1304 | { |
1305 | refcount_inc(r: &lnk->refcnt); |
1306 | } |
1307 | |
1308 | void smcr_link_put(struct smc_link *lnk) |
1309 | { |
1310 | if (refcount_dec_and_test(r: &lnk->refcnt)) |
1311 | __smcr_link_clear(lnk); |
1312 | } |
1313 | |
1314 | static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb, |
1315 | struct smc_buf_desc *buf_desc) |
1316 | { |
1317 | int i; |
1318 | |
1319 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) |
1320 | smcr_buf_unmap_link(buf_desc, is_rmb, lnk: &lgr->lnk[i]); |
1321 | |
1322 | if (!buf_desc->is_vm && buf_desc->pages) |
1323 | __free_pages(page: buf_desc->pages, order: buf_desc->order); |
1324 | else if (buf_desc->is_vm && buf_desc->cpu_addr) |
1325 | vfree(addr: buf_desc->cpu_addr); |
1326 | kfree(objp: buf_desc); |
1327 | } |
1328 | |
1329 | static void smcd_buf_free(struct smc_link_group *lgr, bool is_dmb, |
1330 | struct smc_buf_desc *buf_desc) |
1331 | { |
1332 | if (is_dmb) { |
1333 | /* restore original buf len */ |
1334 | buf_desc->len += sizeof(struct smcd_cdc_msg); |
1335 | smc_ism_unregister_dmb(dev: lgr->smcd, dmb_desc: buf_desc); |
1336 | } else { |
1337 | kfree(objp: buf_desc->cpu_addr); |
1338 | } |
1339 | kfree(objp: buf_desc); |
1340 | } |
1341 | |
1342 | static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, |
1343 | struct smc_buf_desc *buf_desc) |
1344 | { |
1345 | if (lgr->is_smcd) |
1346 | smcd_buf_free(lgr, is_dmb: is_rmb, buf_desc); |
1347 | else |
1348 | smcr_buf_free(lgr, is_rmb, buf_desc); |
1349 | } |
1350 | |
1351 | static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) |
1352 | { |
1353 | struct smc_buf_desc *buf_desc, *bf_desc; |
1354 | struct list_head *buf_list; |
1355 | int i; |
1356 | |
1357 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
1358 | if (is_rmb) |
1359 | buf_list = &lgr->rmbs[i]; |
1360 | else |
1361 | buf_list = &lgr->sndbufs[i]; |
1362 | list_for_each_entry_safe(buf_desc, bf_desc, buf_list, |
1363 | list) { |
1364 | list_del(entry: &buf_desc->list); |
1365 | smc_buf_free(lgr, is_rmb, buf_desc); |
1366 | } |
1367 | } |
1368 | } |
1369 | |
1370 | static void smc_lgr_free_bufs(struct smc_link_group *lgr) |
1371 | { |
1372 | /* free send buffers */ |
1373 | __smc_lgr_free_bufs(lgr, is_rmb: false); |
1374 | /* free rmbs */ |
1375 | __smc_lgr_free_bufs(lgr, is_rmb: true); |
1376 | } |
1377 | |
1378 | /* won't be freed until no one accesses to lgr anymore */ |
1379 | static void __smc_lgr_free(struct smc_link_group *lgr) |
1380 | { |
1381 | smc_lgr_free_bufs(lgr); |
1382 | if (lgr->is_smcd) { |
1383 | if (!atomic_dec_return(v: &lgr->smcd->lgr_cnt)) |
1384 | wake_up(&lgr->smcd->lgrs_deleted); |
1385 | } else { |
1386 | smc_wr_free_lgr_mem(lgr); |
1387 | if (!atomic_dec_return(v: &lgr_cnt)) |
1388 | wake_up(&lgrs_deleted); |
1389 | } |
1390 | kfree(objp: lgr); |
1391 | } |
1392 | |
1393 | /* remove a link group */ |
1394 | static void smc_lgr_free(struct smc_link_group *lgr) |
1395 | { |
1396 | int i; |
1397 | |
1398 | if (!lgr->is_smcd) { |
1399 | down_write(sem: &lgr->llc_conf_mutex); |
1400 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1401 | if (lgr->lnk[i].state != SMC_LNK_UNUSED) |
1402 | smcr_link_clear(lnk: &lgr->lnk[i], log: false); |
1403 | } |
1404 | up_write(sem: &lgr->llc_conf_mutex); |
1405 | smc_llc_lgr_clear(lgr); |
1406 | } |
1407 | |
1408 | destroy_workqueue(wq: lgr->tx_wq); |
1409 | if (lgr->is_smcd) { |
1410 | smc_ism_put_vlan(dev: lgr->smcd, vlan_id: lgr->vlan_id); |
1411 | put_device(dev: lgr->smcd->ops->get_dev(lgr->smcd)); |
1412 | } |
1413 | smc_lgr_put(lgr); /* theoretically last lgr_put */ |
1414 | } |
1415 | |
1416 | void smc_lgr_hold(struct smc_link_group *lgr) |
1417 | { |
1418 | refcount_inc(r: &lgr->refcnt); |
1419 | } |
1420 | |
1421 | void smc_lgr_put(struct smc_link_group *lgr) |
1422 | { |
1423 | if (refcount_dec_and_test(r: &lgr->refcnt)) |
1424 | __smc_lgr_free(lgr); |
1425 | } |
1426 | |
1427 | static void smc_sk_wake_ups(struct smc_sock *smc) |
1428 | { |
1429 | smc->sk.sk_write_space(&smc->sk); |
1430 | smc->sk.sk_data_ready(&smc->sk); |
1431 | smc->sk.sk_state_change(&smc->sk); |
1432 | } |
1433 | |
1434 | /* kill a connection */ |
1435 | static void smc_conn_kill(struct smc_connection *conn, bool soft) |
1436 | { |
1437 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
1438 | |
1439 | if (conn->lgr->is_smcd && conn->lgr->peer_shutdown) |
1440 | conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1; |
1441 | else |
1442 | smc_close_abort(conn); |
1443 | conn->killed = 1; |
1444 | smc->sk.sk_err = ECONNABORTED; |
1445 | smc_sk_wake_ups(smc); |
1446 | if (conn->lgr->is_smcd) { |
1447 | smc_ism_unset_conn(conn); |
1448 | if (soft) |
1449 | tasklet_kill(t: &conn->rx_tsklet); |
1450 | else |
1451 | tasklet_unlock_wait(t: &conn->rx_tsklet); |
1452 | } else { |
1453 | smc_cdc_wait_pend_tx_wr(conn); |
1454 | } |
1455 | smc_lgr_unregister_conn(conn); |
1456 | smc_close_active_abort(smc); |
1457 | } |
1458 | |
1459 | static void smc_lgr_cleanup(struct smc_link_group *lgr) |
1460 | { |
1461 | if (lgr->is_smcd) { |
1462 | smc_ism_signal_shutdown(lgr); |
1463 | } else { |
1464 | u32 rsn = lgr->llc_termination_rsn; |
1465 | |
1466 | if (!rsn) |
1467 | rsn = SMC_LLC_DEL_PROG_INIT_TERM; |
1468 | smc_llc_send_link_delete_all(lgr, ord: false, rsn); |
1469 | smcr_lgr_link_deactivate_all(lgr); |
1470 | } |
1471 | } |
1472 | |
1473 | /* terminate link group |
1474 | * @soft: true if link group shutdown can take its time |
1475 | * false if immediate link group shutdown is required |
1476 | */ |
1477 | static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft) |
1478 | { |
1479 | struct smc_connection *conn; |
1480 | struct smc_sock *smc; |
1481 | struct rb_node *node; |
1482 | |
1483 | if (lgr->terminating) |
1484 | return; /* lgr already terminating */ |
1485 | /* cancel free_work sync, will terminate when lgr->freeing is set */ |
1486 | cancel_delayed_work(dwork: &lgr->free_work); |
1487 | lgr->terminating = 1; |
1488 | |
1489 | /* kill remaining link group connections */ |
1490 | read_lock_bh(&lgr->conns_lock); |
1491 | node = rb_first(&lgr->conns_all); |
1492 | while (node) { |
1493 | read_unlock_bh(&lgr->conns_lock); |
1494 | conn = rb_entry(node, struct smc_connection, alert_node); |
1495 | smc = container_of(conn, struct smc_sock, conn); |
1496 | sock_hold(sk: &smc->sk); /* sock_put below */ |
1497 | lock_sock(sk: &smc->sk); |
1498 | smc_conn_kill(conn, soft); |
1499 | release_sock(sk: &smc->sk); |
1500 | sock_put(sk: &smc->sk); /* sock_hold above */ |
1501 | read_lock_bh(&lgr->conns_lock); |
1502 | node = rb_first(&lgr->conns_all); |
1503 | } |
1504 | read_unlock_bh(&lgr->conns_lock); |
1505 | smc_lgr_cleanup(lgr); |
1506 | smc_lgr_free(lgr); |
1507 | } |
1508 | |
1509 | /* unlink link group and schedule termination */ |
1510 | void smc_lgr_terminate_sched(struct smc_link_group *lgr) |
1511 | { |
1512 | spinlock_t *lgr_lock; |
1513 | |
1514 | smc_lgr_list_head(lgr, lgr_lock: &lgr_lock); |
1515 | spin_lock_bh(lock: lgr_lock); |
1516 | if (list_empty(head: &lgr->list) || lgr->terminating || lgr->freeing) { |
1517 | spin_unlock_bh(lock: lgr_lock); |
1518 | return; /* lgr already terminating */ |
1519 | } |
1520 | list_del_init(entry: &lgr->list); |
1521 | lgr->freeing = 1; |
1522 | spin_unlock_bh(lock: lgr_lock); |
1523 | schedule_work(work: &lgr->terminate_work); |
1524 | } |
1525 | |
1526 | /* Called when peer lgr shutdown (regularly or abnormally) is received */ |
1527 | void smc_smcd_terminate(struct smcd_dev *dev, struct smcd_gid *peer_gid, |
1528 | unsigned short vlan) |
1529 | { |
1530 | struct smc_link_group *lgr, *l; |
1531 | LIST_HEAD(lgr_free_list); |
1532 | |
1533 | /* run common cleanup function and build free list */ |
1534 | spin_lock_bh(lock: &dev->lgr_lock); |
1535 | list_for_each_entry_safe(lgr, l, &dev->lgr_list, list) { |
1536 | if ((!peer_gid->gid || |
1537 | (lgr->peer_gid.gid == peer_gid->gid && |
1538 | !smc_ism_is_emulated(smcd: dev) ? 1 : |
1539 | lgr->peer_gid.gid_ext == peer_gid->gid_ext)) && |
1540 | (vlan == VLAN_VID_MASK || lgr->vlan_id == vlan)) { |
1541 | if (peer_gid->gid) /* peer triggered termination */ |
1542 | lgr->peer_shutdown = 1; |
1543 | list_move(list: &lgr->list, head: &lgr_free_list); |
1544 | lgr->freeing = 1; |
1545 | } |
1546 | } |
1547 | spin_unlock_bh(lock: &dev->lgr_lock); |
1548 | |
1549 | /* cancel the regular free workers and actually free lgrs */ |
1550 | list_for_each_entry_safe(lgr, l, &lgr_free_list, list) { |
1551 | list_del_init(entry: &lgr->list); |
1552 | schedule_work(work: &lgr->terminate_work); |
1553 | } |
1554 | } |
1555 | |
1556 | /* Called when an SMCD device is removed or the smc module is unloaded */ |
1557 | void smc_smcd_terminate_all(struct smcd_dev *smcd) |
1558 | { |
1559 | struct smc_link_group *lgr, *lg; |
1560 | LIST_HEAD(lgr_free_list); |
1561 | |
1562 | spin_lock_bh(lock: &smcd->lgr_lock); |
1563 | list_splice_init(list: &smcd->lgr_list, head: &lgr_free_list); |
1564 | list_for_each_entry(lgr, &lgr_free_list, list) |
1565 | lgr->freeing = 1; |
1566 | spin_unlock_bh(lock: &smcd->lgr_lock); |
1567 | |
1568 | list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { |
1569 | list_del_init(entry: &lgr->list); |
1570 | __smc_lgr_terminate(lgr, soft: false); |
1571 | } |
1572 | |
1573 | if (atomic_read(v: &smcd->lgr_cnt)) |
1574 | wait_event(smcd->lgrs_deleted, !atomic_read(&smcd->lgr_cnt)); |
1575 | } |
1576 | |
1577 | /* Called when an SMCR device is removed or the smc module is unloaded. |
1578 | * If smcibdev is given, all SMCR link groups using this device are terminated. |
1579 | * If smcibdev is NULL, all SMCR link groups are terminated. |
1580 | */ |
1581 | void smc_smcr_terminate_all(struct smc_ib_device *smcibdev) |
1582 | { |
1583 | struct smc_link_group *lgr, *lg; |
1584 | LIST_HEAD(lgr_free_list); |
1585 | int i; |
1586 | |
1587 | spin_lock_bh(lock: &smc_lgr_list.lock); |
1588 | if (!smcibdev) { |
1589 | list_splice_init(list: &smc_lgr_list.list, head: &lgr_free_list); |
1590 | list_for_each_entry(lgr, &lgr_free_list, list) |
1591 | lgr->freeing = 1; |
1592 | } else { |
1593 | list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) { |
1594 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1595 | if (lgr->lnk[i].smcibdev == smcibdev) |
1596 | smcr_link_down_cond_sched(lnk: &lgr->lnk[i]); |
1597 | } |
1598 | } |
1599 | } |
1600 | spin_unlock_bh(lock: &smc_lgr_list.lock); |
1601 | |
1602 | list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) { |
1603 | list_del_init(entry: &lgr->list); |
1604 | smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM); |
1605 | __smc_lgr_terminate(lgr, soft: false); |
1606 | } |
1607 | |
1608 | if (smcibdev) { |
1609 | if (atomic_read(v: &smcibdev->lnk_cnt)) |
1610 | wait_event(smcibdev->lnks_deleted, |
1611 | !atomic_read(&smcibdev->lnk_cnt)); |
1612 | } else { |
1613 | if (atomic_read(v: &lgr_cnt)) |
1614 | wait_event(lgrs_deleted, !atomic_read(&lgr_cnt)); |
1615 | } |
1616 | } |
1617 | |
1618 | /* set new lgr type and clear all asymmetric link tagging */ |
1619 | void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type) |
1620 | { |
1621 | char *lgr_type = "" ; |
1622 | int i; |
1623 | |
1624 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) |
1625 | if (smc_link_usable(lnk: &lgr->lnk[i])) |
1626 | lgr->lnk[i].link_is_asym = false; |
1627 | if (lgr->type == new_type) |
1628 | return; |
1629 | lgr->type = new_type; |
1630 | |
1631 | switch (lgr->type) { |
1632 | case SMC_LGR_NONE: |
1633 | lgr_type = "NONE" ; |
1634 | break; |
1635 | case SMC_LGR_SINGLE: |
1636 | lgr_type = "SINGLE" ; |
1637 | break; |
1638 | case SMC_LGR_SYMMETRIC: |
1639 | lgr_type = "SYMMETRIC" ; |
1640 | break; |
1641 | case SMC_LGR_ASYMMETRIC_PEER: |
1642 | lgr_type = "ASYMMETRIC_PEER" ; |
1643 | break; |
1644 | case SMC_LGR_ASYMMETRIC_LOCAL: |
1645 | lgr_type = "ASYMMETRIC_LOCAL" ; |
1646 | break; |
1647 | } |
1648 | pr_warn_ratelimited("smc: SMC-R lg %*phN net %llu state changed: " |
1649 | "%s, pnetid %.16s\n" , SMC_LGR_ID_SIZE, &lgr->id, |
1650 | lgr->net->net_cookie, lgr_type, lgr->pnet_id); |
1651 | } |
1652 | |
1653 | /* set new lgr type and tag a link as asymmetric */ |
1654 | void smcr_lgr_set_type_asym(struct smc_link_group *lgr, |
1655 | enum smc_lgr_type new_type, int asym_lnk_idx) |
1656 | { |
1657 | smcr_lgr_set_type(lgr, new_type); |
1658 | lgr->lnk[asym_lnk_idx].link_is_asym = true; |
1659 | } |
1660 | |
1661 | /* abort connection, abort_work scheduled from tasklet context */ |
1662 | static void smc_conn_abort_work(struct work_struct *work) |
1663 | { |
1664 | struct smc_connection *conn = container_of(work, |
1665 | struct smc_connection, |
1666 | abort_work); |
1667 | struct smc_sock *smc = container_of(conn, struct smc_sock, conn); |
1668 | |
1669 | lock_sock(sk: &smc->sk); |
1670 | smc_conn_kill(conn, soft: true); |
1671 | release_sock(sk: &smc->sk); |
1672 | sock_put(sk: &smc->sk); /* sock_hold done by schedulers of abort_work */ |
1673 | } |
1674 | |
1675 | void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) |
1676 | { |
1677 | struct smc_link_group *lgr, *n; |
1678 | |
1679 | spin_lock_bh(lock: &smc_lgr_list.lock); |
1680 | list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { |
1681 | struct smc_link *link; |
1682 | |
1683 | if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, |
1684 | SMC_MAX_PNETID_LEN) || |
1685 | lgr->type == SMC_LGR_SYMMETRIC || |
1686 | lgr->type == SMC_LGR_ASYMMETRIC_PEER || |
1687 | !rdma_dev_access_netns(device: smcibdev->ibdev, net: lgr->net)) |
1688 | continue; |
1689 | |
1690 | if (lgr->type == SMC_LGR_SINGLE && lgr->max_links <= 1) |
1691 | continue; |
1692 | |
1693 | /* trigger local add link processing */ |
1694 | link = smc_llc_usable_link(lgr); |
1695 | if (link) |
1696 | smc_llc_add_link_local(link); |
1697 | } |
1698 | spin_unlock_bh(lock: &smc_lgr_list.lock); |
1699 | } |
1700 | |
1701 | /* link is down - switch connections to alternate link, |
1702 | * must be called under lgr->llc_conf_mutex lock |
1703 | */ |
1704 | static void smcr_link_down(struct smc_link *lnk) |
1705 | { |
1706 | struct smc_link_group *lgr = lnk->lgr; |
1707 | struct smc_link *to_lnk; |
1708 | int del_link_id; |
1709 | |
1710 | if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(head: &lgr->list)) |
1711 | return; |
1712 | |
1713 | to_lnk = smc_switch_conns(lgr, from_lnk: lnk, is_dev_err: true); |
1714 | if (!to_lnk) { /* no backup link available */ |
1715 | smcr_link_clear(lnk, log: true); |
1716 | return; |
1717 | } |
1718 | smcr_lgr_set_type(lgr, new_type: SMC_LGR_SINGLE); |
1719 | del_link_id = lnk->link_id; |
1720 | |
1721 | if (lgr->role == SMC_SERV) { |
1722 | /* trigger local delete link processing */ |
1723 | smc_llc_srv_delete_link_local(link: to_lnk, del_link_id); |
1724 | } else { |
1725 | if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { |
1726 | /* another llc task is ongoing */ |
1727 | up_write(sem: &lgr->llc_conf_mutex); |
1728 | wait_event_timeout(lgr->llc_flow_waiter, |
1729 | (list_empty(&lgr->list) || |
1730 | lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), |
1731 | SMC_LLC_WAIT_TIME); |
1732 | down_write(sem: &lgr->llc_conf_mutex); |
1733 | } |
1734 | if (!list_empty(head: &lgr->list)) { |
1735 | smc_llc_send_delete_link(link: to_lnk, link_del_id: del_link_id, |
1736 | reqresp: SMC_LLC_REQ, orderly: true, |
1737 | SMC_LLC_DEL_LOST_PATH); |
1738 | smcr_link_clear(lnk, log: true); |
1739 | } |
1740 | wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ |
1741 | } |
1742 | } |
1743 | |
1744 | /* must be called under lgr->llc_conf_mutex lock */ |
1745 | void smcr_link_down_cond(struct smc_link *lnk) |
1746 | { |
1747 | if (smc_link_downing(&lnk->state)) { |
1748 | trace_smcr_link_down(lnk, location: __builtin_return_address(0)); |
1749 | smcr_link_down(lnk); |
1750 | } |
1751 | } |
1752 | |
1753 | /* will get the lgr->llc_conf_mutex lock */ |
1754 | void smcr_link_down_cond_sched(struct smc_link *lnk) |
1755 | { |
1756 | if (smc_link_downing(&lnk->state)) { |
1757 | trace_smcr_link_down(lnk, location: __builtin_return_address(0)); |
1758 | schedule_work(work: &lnk->link_down_wrk); |
1759 | } |
1760 | } |
1761 | |
1762 | void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) |
1763 | { |
1764 | struct smc_link_group *lgr, *n; |
1765 | int i; |
1766 | |
1767 | list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { |
1768 | if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, |
1769 | SMC_MAX_PNETID_LEN)) |
1770 | continue; /* lgr is not affected */ |
1771 | if (list_empty(head: &lgr->list)) |
1772 | continue; |
1773 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1774 | struct smc_link *lnk = &lgr->lnk[i]; |
1775 | |
1776 | if (smc_link_usable(lnk) && |
1777 | lnk->smcibdev == smcibdev && lnk->ibport == ibport) |
1778 | smcr_link_down_cond_sched(lnk); |
1779 | } |
1780 | } |
1781 | } |
1782 | |
1783 | static void smc_link_down_work(struct work_struct *work) |
1784 | { |
1785 | struct smc_link *link = container_of(work, struct smc_link, |
1786 | link_down_wrk); |
1787 | struct smc_link_group *lgr = link->lgr; |
1788 | |
1789 | if (list_empty(head: &lgr->list)) |
1790 | return; |
1791 | wake_up_all(&lgr->llc_msg_waiter); |
1792 | down_write(sem: &lgr->llc_conf_mutex); |
1793 | smcr_link_down(lnk: link); |
1794 | up_write(sem: &lgr->llc_conf_mutex); |
1795 | } |
1796 | |
1797 | static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev, |
1798 | struct netdev_nested_priv *priv) |
1799 | { |
1800 | unsigned short *vlan_id = (unsigned short *)priv->data; |
1801 | |
1802 | if (is_vlan_dev(dev: lower_dev)) { |
1803 | *vlan_id = vlan_dev_vlan_id(dev: lower_dev); |
1804 | return 1; |
1805 | } |
1806 | |
1807 | return 0; |
1808 | } |
1809 | |
1810 | /* Determine vlan of internal TCP socket. */ |
1811 | int smc_vlan_by_tcpsk(struct socket *clcsock, struct smc_init_info *ini) |
1812 | { |
1813 | struct dst_entry *dst = sk_dst_get(sk: clcsock->sk); |
1814 | struct netdev_nested_priv priv; |
1815 | struct net_device *ndev; |
1816 | int rc = 0; |
1817 | |
1818 | ini->vlan_id = 0; |
1819 | if (!dst) { |
1820 | rc = -ENOTCONN; |
1821 | goto out; |
1822 | } |
1823 | if (!dst->dev) { |
1824 | rc = -ENODEV; |
1825 | goto out_rel; |
1826 | } |
1827 | |
1828 | ndev = dst->dev; |
1829 | if (is_vlan_dev(dev: ndev)) { |
1830 | ini->vlan_id = vlan_dev_vlan_id(dev: ndev); |
1831 | goto out_rel; |
1832 | } |
1833 | |
1834 | priv.data = (void *)&ini->vlan_id; |
1835 | rtnl_lock(); |
1836 | netdev_walk_all_lower_dev(dev: ndev, fn: smc_vlan_by_tcpsk_walk, priv: &priv); |
1837 | rtnl_unlock(); |
1838 | |
1839 | out_rel: |
1840 | dst_release(dst); |
1841 | out: |
1842 | return rc; |
1843 | } |
1844 | |
1845 | static bool smcr_lgr_match(struct smc_link_group *lgr, u8 smcr_version, |
1846 | u8 peer_systemid[], |
1847 | u8 peer_gid[], |
1848 | u8 peer_mac_v1[], |
1849 | enum smc_lgr_role role, u32 clcqpn, |
1850 | struct net *net) |
1851 | { |
1852 | struct smc_link *lnk; |
1853 | int i; |
1854 | |
1855 | if (memcmp(p: lgr->peer_systemid, q: peer_systemid, SMC_SYSTEMID_LEN) || |
1856 | lgr->role != role) |
1857 | return false; |
1858 | |
1859 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
1860 | lnk = &lgr->lnk[i]; |
1861 | |
1862 | if (!smc_link_active(lnk)) |
1863 | continue; |
1864 | /* use verbs API to check netns, instead of lgr->net */ |
1865 | if (!rdma_dev_access_netns(device: lnk->smcibdev->ibdev, net)) |
1866 | return false; |
1867 | if ((lgr->role == SMC_SERV || lnk->peer_qpn == clcqpn) && |
1868 | !memcmp(p: lnk->peer_gid, q: peer_gid, SMC_GID_SIZE) && |
1869 | (smcr_version == SMC_V2 || |
1870 | !memcmp(p: lnk->peer_mac, q: peer_mac_v1, ETH_ALEN))) |
1871 | return true; |
1872 | } |
1873 | return false; |
1874 | } |
1875 | |
1876 | static bool smcd_lgr_match(struct smc_link_group *lgr, |
1877 | struct smcd_dev *smcismdev, |
1878 | struct smcd_gid *peer_gid) |
1879 | { |
1880 | if (lgr->peer_gid.gid != peer_gid->gid || |
1881 | lgr->smcd != smcismdev) |
1882 | return false; |
1883 | |
1884 | if (smc_ism_is_emulated(smcd: smcismdev) && |
1885 | lgr->peer_gid.gid_ext != peer_gid->gid_ext) |
1886 | return false; |
1887 | |
1888 | return true; |
1889 | } |
1890 | |
1891 | /* create a new SMC connection (and a new link group if necessary) */ |
1892 | int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) |
1893 | { |
1894 | struct smc_connection *conn = &smc->conn; |
1895 | struct net *net = sock_net(sk: &smc->sk); |
1896 | struct list_head *lgr_list; |
1897 | struct smc_link_group *lgr; |
1898 | enum smc_lgr_role role; |
1899 | spinlock_t *lgr_lock; |
1900 | int rc = 0; |
1901 | |
1902 | lgr_list = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_list : |
1903 | &smc_lgr_list.list; |
1904 | lgr_lock = ini->is_smcd ? &ini->ism_dev[ini->ism_selected]->lgr_lock : |
1905 | &smc_lgr_list.lock; |
1906 | ini->first_contact_local = 1; |
1907 | role = smc->listen_smc ? SMC_SERV : SMC_CLNT; |
1908 | if (role == SMC_CLNT && ini->first_contact_peer) |
1909 | /* create new link group as well */ |
1910 | goto create; |
1911 | |
1912 | /* determine if an existing link group can be reused */ |
1913 | spin_lock_bh(lock: lgr_lock); |
1914 | list_for_each_entry(lgr, lgr_list, list) { |
1915 | write_lock_bh(&lgr->conns_lock); |
1916 | if ((ini->is_smcd ? |
1917 | smcd_lgr_match(lgr, smcismdev: ini->ism_dev[ini->ism_selected], |
1918 | peer_gid: &ini->ism_peer_gid[ini->ism_selected]) : |
1919 | smcr_lgr_match(lgr, smcr_version: ini->smcr_version, |
1920 | peer_systemid: ini->peer_systemid, |
1921 | peer_gid: ini->peer_gid, peer_mac_v1: ini->peer_mac, role, |
1922 | clcqpn: ini->ib_clcqpn, net)) && |
1923 | !lgr->sync_err && |
1924 | (ini->smcd_version == SMC_V2 || |
1925 | lgr->vlan_id == ini->vlan_id) && |
1926 | (role == SMC_CLNT || ini->is_smcd || |
1927 | (lgr->conns_num < lgr->max_conns && |
1928 | !bitmap_full(src: lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX)))) { |
1929 | /* link group found */ |
1930 | ini->first_contact_local = 0; |
1931 | conn->lgr = lgr; |
1932 | rc = smc_lgr_register_conn(conn, first: false); |
1933 | write_unlock_bh(&lgr->conns_lock); |
1934 | if (!rc && delayed_work_pending(&lgr->free_work)) |
1935 | cancel_delayed_work(dwork: &lgr->free_work); |
1936 | break; |
1937 | } |
1938 | write_unlock_bh(&lgr->conns_lock); |
1939 | } |
1940 | spin_unlock_bh(lock: lgr_lock); |
1941 | if (rc) |
1942 | return rc; |
1943 | |
1944 | if (role == SMC_CLNT && !ini->first_contact_peer && |
1945 | ini->first_contact_local) { |
1946 | /* Server reuses a link group, but Client wants to start |
1947 | * a new one |
1948 | * send out_of_sync decline, reason synchr. error |
1949 | */ |
1950 | return SMC_CLC_DECL_SYNCERR; |
1951 | } |
1952 | |
1953 | create: |
1954 | if (ini->first_contact_local) { |
1955 | rc = smc_lgr_create(smc, ini); |
1956 | if (rc) |
1957 | goto out; |
1958 | lgr = conn->lgr; |
1959 | write_lock_bh(&lgr->conns_lock); |
1960 | rc = smc_lgr_register_conn(conn, first: true); |
1961 | write_unlock_bh(&lgr->conns_lock); |
1962 | if (rc) { |
1963 | smc_lgr_cleanup_early(lgr); |
1964 | goto out; |
1965 | } |
1966 | } |
1967 | smc_lgr_hold(lgr: conn->lgr); /* lgr_put in smc_conn_free() */ |
1968 | if (!conn->lgr->is_smcd) |
1969 | smcr_link_hold(lnk: conn->lnk); /* link_put in smc_conn_free() */ |
1970 | conn->freed = 0; |
1971 | conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE; |
1972 | conn->local_tx_ctrl.len = SMC_WR_TX_SIZE; |
1973 | conn->urg_state = SMC_URG_READ; |
1974 | init_waitqueue_head(&conn->cdc_pend_tx_wq); |
1975 | INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work); |
1976 | if (ini->is_smcd) { |
1977 | conn->rx_off = sizeof(struct smcd_cdc_msg); |
1978 | smcd_cdc_rx_init(conn); /* init tasklet for this conn */ |
1979 | } else { |
1980 | conn->rx_off = 0; |
1981 | } |
1982 | #ifndef KERNEL_HAS_ATOMIC64 |
1983 | spin_lock_init(&conn->acurs_lock); |
1984 | #endif |
1985 | |
1986 | out: |
1987 | return rc; |
1988 | } |
1989 | |
1990 | #define SMCD_DMBE_SIZES 6 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */ |
1991 | #define SMCR_RMBE_SIZES 5 /* 0 -> 16KB, 1 -> 32KB, .. 5 -> 512KB */ |
1992 | |
1993 | /* convert the RMB size into the compressed notation (minimum 16K, see |
1994 | * SMCD/R_DMBE_SIZES. |
1995 | * In contrast to plain ilog2, this rounds towards the next power of 2, |
1996 | * so the socket application gets at least its desired sndbuf / rcvbuf size. |
1997 | */ |
1998 | static u8 smc_compress_bufsize(int size, bool is_smcd, bool is_rmb) |
1999 | { |
2000 | const unsigned int max_scat = SG_MAX_SINGLE_ALLOC * PAGE_SIZE; |
2001 | u8 compressed; |
2002 | |
2003 | if (size <= SMC_BUF_MIN_SIZE) |
2004 | return 0; |
2005 | |
2006 | size = (size - 1) >> 14; /* convert to 16K multiple */ |
2007 | compressed = min_t(u8, ilog2(size) + 1, |
2008 | is_smcd ? SMCD_DMBE_SIZES : SMCR_RMBE_SIZES); |
2009 | |
2010 | if (!is_smcd && is_rmb) |
2011 | /* RMBs are backed by & limited to max size of scatterlists */ |
2012 | compressed = min_t(u8, compressed, ilog2(max_scat >> 14)); |
2013 | |
2014 | return compressed; |
2015 | } |
2016 | |
2017 | /* convert the RMB size from compressed notation into integer */ |
2018 | int smc_uncompress_bufsize(u8 compressed) |
2019 | { |
2020 | u32 size; |
2021 | |
2022 | size = 0x00000001 << (((int)compressed) + 14); |
2023 | return (int)size; |
2024 | } |
2025 | |
2026 | /* try to reuse a sndbuf or rmb description slot for a certain |
2027 | * buffer size; if not available, return NULL |
2028 | */ |
2029 | static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize, |
2030 | struct rw_semaphore *lock, |
2031 | struct list_head *buf_list) |
2032 | { |
2033 | struct smc_buf_desc *buf_slot; |
2034 | |
2035 | down_read(sem: lock); |
2036 | list_for_each_entry(buf_slot, buf_list, list) { |
2037 | if (cmpxchg(&buf_slot->used, 0, 1) == 0) { |
2038 | up_read(sem: lock); |
2039 | return buf_slot; |
2040 | } |
2041 | } |
2042 | up_read(sem: lock); |
2043 | return NULL; |
2044 | } |
2045 | |
2046 | /* one of the conditions for announcing a receiver's current window size is |
2047 | * that it "results in a minimum increase in the window size of 10% of the |
2048 | * receive buffer space" [RFC7609] |
2049 | */ |
2050 | static inline int smc_rmb_wnd_update_limit(int rmbe_size) |
2051 | { |
2052 | return max_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); |
2053 | } |
2054 | |
2055 | /* map an buf to a link */ |
2056 | static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb, |
2057 | struct smc_link *lnk) |
2058 | { |
2059 | int rc, i, nents, offset, buf_size, size, access_flags; |
2060 | struct scatterlist *sg; |
2061 | void *buf; |
2062 | |
2063 | if (buf_desc->is_map_ib[lnk->link_idx]) |
2064 | return 0; |
2065 | |
2066 | if (buf_desc->is_vm) { |
2067 | buf = buf_desc->cpu_addr; |
2068 | buf_size = buf_desc->len; |
2069 | offset = offset_in_page(buf_desc->cpu_addr); |
2070 | nents = PAGE_ALIGN(buf_size + offset) / PAGE_SIZE; |
2071 | } else { |
2072 | nents = 1; |
2073 | } |
2074 | |
2075 | rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], nents, GFP_KERNEL); |
2076 | if (rc) |
2077 | return rc; |
2078 | |
2079 | if (buf_desc->is_vm) { |
2080 | /* virtually contiguous buffer */ |
2081 | for_each_sg(buf_desc->sgt[lnk->link_idx].sgl, sg, nents, i) { |
2082 | size = min_t(int, PAGE_SIZE - offset, buf_size); |
2083 | sg_set_page(sg, page: vmalloc_to_page(addr: buf), len: size, offset); |
2084 | buf += size / sizeof(*buf); |
2085 | buf_size -= size; |
2086 | offset = 0; |
2087 | } |
2088 | } else { |
2089 | /* physically contiguous buffer */ |
2090 | sg_set_buf(sg: buf_desc->sgt[lnk->link_idx].sgl, |
2091 | buf: buf_desc->cpu_addr, buflen: buf_desc->len); |
2092 | } |
2093 | |
2094 | /* map sg table to DMA address */ |
2095 | rc = smc_ib_buf_map_sg(lnk, buf_slot: buf_desc, |
2096 | data_direction: is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); |
2097 | /* SMC protocol depends on mapping to one DMA address only */ |
2098 | if (rc != nents) { |
2099 | rc = -EAGAIN; |
2100 | goto free_table; |
2101 | } |
2102 | |
2103 | buf_desc->is_dma_need_sync |= |
2104 | smc_ib_is_sg_need_sync(lnk, buf_slot: buf_desc) << lnk->link_idx; |
2105 | |
2106 | if (is_rmb || buf_desc->is_vm) { |
2107 | /* create a new memory region for the RMB or vzalloced sndbuf */ |
2108 | access_flags = is_rmb ? |
2109 | IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : |
2110 | IB_ACCESS_LOCAL_WRITE; |
2111 | |
2112 | rc = smc_ib_get_memory_region(pd: lnk->roce_pd, access_flags, |
2113 | buf_slot: buf_desc, link_idx: lnk->link_idx); |
2114 | if (rc) |
2115 | goto buf_unmap; |
2116 | smc_ib_sync_sg_for_device(lnk, buf_slot: buf_desc, |
2117 | data_direction: is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); |
2118 | } |
2119 | buf_desc->is_map_ib[lnk->link_idx] = true; |
2120 | return 0; |
2121 | |
2122 | buf_unmap: |
2123 | smc_ib_buf_unmap_sg(lnk, buf_slot: buf_desc, |
2124 | data_direction: is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); |
2125 | free_table: |
2126 | sg_free_table(&buf_desc->sgt[lnk->link_idx]); |
2127 | return rc; |
2128 | } |
2129 | |
2130 | /* register a new buf on IB device, rmb or vzalloced sndbuf |
2131 | * must be called under lgr->llc_conf_mutex lock |
2132 | */ |
2133 | int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc) |
2134 | { |
2135 | if (list_empty(head: &link->lgr->list)) |
2136 | return -ENOLINK; |
2137 | if (!buf_desc->is_reg_mr[link->link_idx]) { |
2138 | /* register memory region for new buf */ |
2139 | if (buf_desc->is_vm) |
2140 | buf_desc->mr[link->link_idx]->iova = |
2141 | (uintptr_t)buf_desc->cpu_addr; |
2142 | if (smc_wr_reg_send(link, mr: buf_desc->mr[link->link_idx])) { |
2143 | buf_desc->is_reg_err = true; |
2144 | return -EFAULT; |
2145 | } |
2146 | buf_desc->is_reg_mr[link->link_idx] = true; |
2147 | } |
2148 | return 0; |
2149 | } |
2150 | |
2151 | static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock, |
2152 | struct list_head *lst, bool is_rmb) |
2153 | { |
2154 | struct smc_buf_desc *buf_desc, *bf; |
2155 | int rc = 0; |
2156 | |
2157 | down_write(sem: lock); |
2158 | list_for_each_entry_safe(buf_desc, bf, lst, list) { |
2159 | if (!buf_desc->used) |
2160 | continue; |
2161 | rc = smcr_buf_map_link(buf_desc, is_rmb, lnk); |
2162 | if (rc) |
2163 | goto out; |
2164 | } |
2165 | out: |
2166 | up_write(sem: lock); |
2167 | return rc; |
2168 | } |
2169 | |
2170 | /* map all used buffers of lgr for a new link */ |
2171 | int smcr_buf_map_lgr(struct smc_link *lnk) |
2172 | { |
2173 | struct smc_link_group *lgr = lnk->lgr; |
2174 | int i, rc = 0; |
2175 | |
2176 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
2177 | rc = _smcr_buf_map_lgr(lnk, lock: &lgr->rmbs_lock, |
2178 | lst: &lgr->rmbs[i], is_rmb: true); |
2179 | if (rc) |
2180 | return rc; |
2181 | rc = _smcr_buf_map_lgr(lnk, lock: &lgr->sndbufs_lock, |
2182 | lst: &lgr->sndbufs[i], is_rmb: false); |
2183 | if (rc) |
2184 | return rc; |
2185 | } |
2186 | return 0; |
2187 | } |
2188 | |
2189 | /* register all used buffers of lgr for a new link, |
2190 | * must be called under lgr->llc_conf_mutex lock |
2191 | */ |
2192 | int smcr_buf_reg_lgr(struct smc_link *lnk) |
2193 | { |
2194 | struct smc_link_group *lgr = lnk->lgr; |
2195 | struct smc_buf_desc *buf_desc, *bf; |
2196 | int i, rc = 0; |
2197 | |
2198 | /* reg all RMBs for a new link */ |
2199 | down_write(sem: &lgr->rmbs_lock); |
2200 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
2201 | list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) { |
2202 | if (!buf_desc->used) |
2203 | continue; |
2204 | rc = smcr_link_reg_buf(link: lnk, buf_desc); |
2205 | if (rc) { |
2206 | up_write(sem: &lgr->rmbs_lock); |
2207 | return rc; |
2208 | } |
2209 | } |
2210 | } |
2211 | up_write(sem: &lgr->rmbs_lock); |
2212 | |
2213 | if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) |
2214 | return rc; |
2215 | |
2216 | /* reg all vzalloced sndbufs for a new link */ |
2217 | down_write(sem: &lgr->sndbufs_lock); |
2218 | for (i = 0; i < SMC_RMBE_SIZES; i++) { |
2219 | list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) { |
2220 | if (!buf_desc->used || !buf_desc->is_vm) |
2221 | continue; |
2222 | rc = smcr_link_reg_buf(link: lnk, buf_desc); |
2223 | if (rc) { |
2224 | up_write(sem: &lgr->sndbufs_lock); |
2225 | return rc; |
2226 | } |
2227 | } |
2228 | } |
2229 | up_write(sem: &lgr->sndbufs_lock); |
2230 | return rc; |
2231 | } |
2232 | |
2233 | static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr, |
2234 | bool is_rmb, int bufsize) |
2235 | { |
2236 | struct smc_buf_desc *buf_desc; |
2237 | |
2238 | /* try to alloc a new buffer */ |
2239 | buf_desc = kzalloc(size: sizeof(*buf_desc), GFP_KERNEL); |
2240 | if (!buf_desc) |
2241 | return ERR_PTR(error: -ENOMEM); |
2242 | |
2243 | switch (lgr->buf_type) { |
2244 | case SMCR_PHYS_CONT_BUFS: |
2245 | case SMCR_MIXED_BUFS: |
2246 | buf_desc->order = get_order(size: bufsize); |
2247 | buf_desc->pages = alloc_pages(GFP_KERNEL | __GFP_NOWARN | |
2248 | __GFP_NOMEMALLOC | __GFP_COMP | |
2249 | __GFP_NORETRY | __GFP_ZERO, |
2250 | order: buf_desc->order); |
2251 | if (buf_desc->pages) { |
2252 | buf_desc->cpu_addr = |
2253 | (void *)page_address(buf_desc->pages); |
2254 | buf_desc->len = bufsize; |
2255 | buf_desc->is_vm = false; |
2256 | break; |
2257 | } |
2258 | if (lgr->buf_type == SMCR_PHYS_CONT_BUFS) |
2259 | goto out; |
2260 | fallthrough; // try virtually continguous buf |
2261 | case SMCR_VIRT_CONT_BUFS: |
2262 | buf_desc->order = get_order(size: bufsize); |
2263 | buf_desc->cpu_addr = vzalloc(PAGE_SIZE << buf_desc->order); |
2264 | if (!buf_desc->cpu_addr) |
2265 | goto out; |
2266 | buf_desc->pages = NULL; |
2267 | buf_desc->len = bufsize; |
2268 | buf_desc->is_vm = true; |
2269 | break; |
2270 | } |
2271 | return buf_desc; |
2272 | |
2273 | out: |
2274 | kfree(objp: buf_desc); |
2275 | return ERR_PTR(error: -EAGAIN); |
2276 | } |
2277 | |
2278 | /* map buf_desc on all usable links, |
2279 | * unused buffers stay mapped as long as the link is up |
2280 | */ |
2281 | static int smcr_buf_map_usable_links(struct smc_link_group *lgr, |
2282 | struct smc_buf_desc *buf_desc, bool is_rmb) |
2283 | { |
2284 | int i, rc = 0, cnt = 0; |
2285 | |
2286 | /* protect against parallel link reconfiguration */ |
2287 | down_read(sem: &lgr->llc_conf_mutex); |
2288 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
2289 | struct smc_link *lnk = &lgr->lnk[i]; |
2290 | |
2291 | if (!smc_link_usable(lnk)) |
2292 | continue; |
2293 | if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) { |
2294 | rc = -ENOMEM; |
2295 | goto out; |
2296 | } |
2297 | cnt++; |
2298 | } |
2299 | out: |
2300 | up_read(sem: &lgr->llc_conf_mutex); |
2301 | if (!rc && !cnt) |
2302 | rc = -EINVAL; |
2303 | return rc; |
2304 | } |
2305 | |
2306 | static struct smc_buf_desc *smcd_new_buf_create(struct smc_link_group *lgr, |
2307 | bool is_dmb, int bufsize) |
2308 | { |
2309 | struct smc_buf_desc *buf_desc; |
2310 | int rc; |
2311 | |
2312 | /* try to alloc a new DMB */ |
2313 | buf_desc = kzalloc(size: sizeof(*buf_desc), GFP_KERNEL); |
2314 | if (!buf_desc) |
2315 | return ERR_PTR(error: -ENOMEM); |
2316 | if (is_dmb) { |
2317 | rc = smc_ism_register_dmb(lgr, buf_size: bufsize, dmb_desc: buf_desc); |
2318 | if (rc) { |
2319 | kfree(objp: buf_desc); |
2320 | if (rc == -ENOMEM) |
2321 | return ERR_PTR(error: -EAGAIN); |
2322 | if (rc == -ENOSPC) |
2323 | return ERR_PTR(error: -ENOSPC); |
2324 | return ERR_PTR(error: -EIO); |
2325 | } |
2326 | buf_desc->pages = virt_to_page(buf_desc->cpu_addr); |
2327 | /* CDC header stored in buf. So, pretend it was smaller */ |
2328 | buf_desc->len = bufsize - sizeof(struct smcd_cdc_msg); |
2329 | } else { |
2330 | buf_desc->cpu_addr = kzalloc(size: bufsize, GFP_KERNEL | |
2331 | __GFP_NOWARN | __GFP_NORETRY | |
2332 | __GFP_NOMEMALLOC); |
2333 | if (!buf_desc->cpu_addr) { |
2334 | kfree(objp: buf_desc); |
2335 | return ERR_PTR(error: -EAGAIN); |
2336 | } |
2337 | buf_desc->len = bufsize; |
2338 | } |
2339 | return buf_desc; |
2340 | } |
2341 | |
2342 | static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) |
2343 | { |
2344 | struct smc_buf_desc *buf_desc = ERR_PTR(error: -ENOMEM); |
2345 | struct smc_connection *conn = &smc->conn; |
2346 | struct smc_link_group *lgr = conn->lgr; |
2347 | struct list_head *buf_list; |
2348 | int bufsize, bufsize_comp; |
2349 | struct rw_semaphore *lock; /* lock buffer list */ |
2350 | bool is_dgraded = false; |
2351 | |
2352 | if (is_rmb) |
2353 | /* use socket recv buffer size (w/o overhead) as start value */ |
2354 | bufsize = smc->sk.sk_rcvbuf / 2; |
2355 | else |
2356 | /* use socket send buffer size (w/o overhead) as start value */ |
2357 | bufsize = smc->sk.sk_sndbuf / 2; |
2358 | |
2359 | for (bufsize_comp = smc_compress_bufsize(size: bufsize, is_smcd, is_rmb); |
2360 | bufsize_comp >= 0; bufsize_comp--) { |
2361 | if (is_rmb) { |
2362 | lock = &lgr->rmbs_lock; |
2363 | buf_list = &lgr->rmbs[bufsize_comp]; |
2364 | } else { |
2365 | lock = &lgr->sndbufs_lock; |
2366 | buf_list = &lgr->sndbufs[bufsize_comp]; |
2367 | } |
2368 | bufsize = smc_uncompress_bufsize(compressed: bufsize_comp); |
2369 | |
2370 | /* check for reusable slot in the link group */ |
2371 | buf_desc = smc_buf_get_slot(compressed_bufsize: bufsize_comp, lock, buf_list); |
2372 | if (buf_desc) { |
2373 | buf_desc->is_dma_need_sync = 0; |
2374 | SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); |
2375 | SMC_STAT_BUF_REUSE(smc, is_smcd, is_rmb); |
2376 | break; /* found reusable slot */ |
2377 | } |
2378 | |
2379 | if (is_smcd) |
2380 | buf_desc = smcd_new_buf_create(lgr, is_dmb: is_rmb, bufsize); |
2381 | else |
2382 | buf_desc = smcr_new_buf_create(lgr, is_rmb, bufsize); |
2383 | |
2384 | if (PTR_ERR(ptr: buf_desc) == -ENOMEM) |
2385 | break; |
2386 | if (IS_ERR(ptr: buf_desc)) { |
2387 | if (!is_dgraded) { |
2388 | is_dgraded = true; |
2389 | SMC_STAT_RMB_DOWNGRADED(smc, is_smcd, is_rmb); |
2390 | } |
2391 | continue; |
2392 | } |
2393 | |
2394 | SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb); |
2395 | SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize); |
2396 | buf_desc->used = 1; |
2397 | down_write(sem: lock); |
2398 | list_add(new: &buf_desc->list, head: buf_list); |
2399 | up_write(sem: lock); |
2400 | break; /* found */ |
2401 | } |
2402 | |
2403 | if (IS_ERR(ptr: buf_desc)) |
2404 | return PTR_ERR(ptr: buf_desc); |
2405 | |
2406 | if (!is_smcd) { |
2407 | if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) { |
2408 | smcr_buf_unuse(buf_desc, is_rmb, lgr); |
2409 | return -ENOMEM; |
2410 | } |
2411 | } |
2412 | |
2413 | if (is_rmb) { |
2414 | conn->rmb_desc = buf_desc; |
2415 | conn->rmbe_size_comp = bufsize_comp; |
2416 | smc->sk.sk_rcvbuf = bufsize * 2; |
2417 | atomic_set(v: &conn->bytes_to_rcv, i: 0); |
2418 | conn->rmbe_update_limit = |
2419 | smc_rmb_wnd_update_limit(rmbe_size: buf_desc->len); |
2420 | if (is_smcd) |
2421 | smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ |
2422 | } else { |
2423 | conn->sndbuf_desc = buf_desc; |
2424 | smc->sk.sk_sndbuf = bufsize * 2; |
2425 | atomic_set(v: &conn->sndbuf_space, i: bufsize); |
2426 | } |
2427 | return 0; |
2428 | } |
2429 | |
2430 | void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) |
2431 | { |
2432 | if (!conn->sndbuf_desc->is_dma_need_sync) |
2433 | return; |
2434 | if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd || |
2435 | !smc_link_active(lnk: conn->lnk)) |
2436 | return; |
2437 | smc_ib_sync_sg_for_device(lnk: conn->lnk, buf_slot: conn->sndbuf_desc, data_direction: DMA_TO_DEVICE); |
2438 | } |
2439 | |
2440 | void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) |
2441 | { |
2442 | int i; |
2443 | |
2444 | if (!conn->rmb_desc->is_dma_need_sync) |
2445 | return; |
2446 | if (!smc_conn_lgr_valid(conn) || conn->lgr->is_smcd) |
2447 | return; |
2448 | for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { |
2449 | if (!smc_link_active(lnk: &conn->lgr->lnk[i])) |
2450 | continue; |
2451 | smc_ib_sync_sg_for_cpu(lnk: &conn->lgr->lnk[i], buf_slot: conn->rmb_desc, |
2452 | data_direction: DMA_FROM_DEVICE); |
2453 | } |
2454 | } |
2455 | |
2456 | /* create the send and receive buffer for an SMC socket; |
2457 | * receive buffers are called RMBs; |
2458 | * (even though the SMC protocol allows more than one RMB-element per RMB, |
2459 | * the Linux implementation uses just one RMB-element per RMB, i.e. uses an |
2460 | * extra RMB for every connection in a link group |
2461 | */ |
2462 | int smc_buf_create(struct smc_sock *smc, bool is_smcd) |
2463 | { |
2464 | int rc; |
2465 | |
2466 | /* create send buffer */ |
2467 | rc = __smc_buf_create(smc, is_smcd, is_rmb: false); |
2468 | if (rc) |
2469 | return rc; |
2470 | /* create rmb */ |
2471 | rc = __smc_buf_create(smc, is_smcd, is_rmb: true); |
2472 | if (rc) { |
2473 | down_write(sem: &smc->conn.lgr->sndbufs_lock); |
2474 | list_del(entry: &smc->conn.sndbuf_desc->list); |
2475 | up_write(sem: &smc->conn.lgr->sndbufs_lock); |
2476 | smc_buf_free(lgr: smc->conn.lgr, is_rmb: false, buf_desc: smc->conn.sndbuf_desc); |
2477 | smc->conn.sndbuf_desc = NULL; |
2478 | } |
2479 | return rc; |
2480 | } |
2481 | |
2482 | static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) |
2483 | { |
2484 | int i; |
2485 | |
2486 | for_each_clear_bit(i, lgr->rtokens_used_mask, SMC_RMBS_PER_LGR_MAX) { |
2487 | if (!test_and_set_bit(nr: i, addr: lgr->rtokens_used_mask)) |
2488 | return i; |
2489 | } |
2490 | return -ENOSPC; |
2491 | } |
2492 | |
2493 | static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx, |
2494 | u32 rkey) |
2495 | { |
2496 | int i; |
2497 | |
2498 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
2499 | if (test_bit(i, lgr->rtokens_used_mask) && |
2500 | lgr->rtokens[i][lnk_idx].rkey == rkey) |
2501 | return i; |
2502 | } |
2503 | return -ENOENT; |
2504 | } |
2505 | |
2506 | /* set rtoken for a new link to an existing rmb */ |
2507 | void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new, |
2508 | __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey) |
2509 | { |
2510 | int rtok_idx; |
2511 | |
2512 | rtok_idx = smc_rtoken_find_by_link(lgr, lnk_idx: link_idx, ntohl(nw_rkey_known)); |
2513 | if (rtok_idx == -ENOENT) |
2514 | return; |
2515 | lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey); |
2516 | lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr); |
2517 | } |
2518 | |
2519 | /* set rtoken for a new link whose link_id is given */ |
2520 | void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id, |
2521 | __be64 nw_vaddr, __be32 nw_rkey) |
2522 | { |
2523 | u64 dma_addr = be64_to_cpu(nw_vaddr); |
2524 | u32 rkey = ntohl(nw_rkey); |
2525 | bool found = false; |
2526 | int link_idx; |
2527 | |
2528 | for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) { |
2529 | if (lgr->lnk[link_idx].link_id == link_id) { |
2530 | found = true; |
2531 | break; |
2532 | } |
2533 | } |
2534 | if (!found) |
2535 | return; |
2536 | lgr->rtokens[rtok_idx][link_idx].rkey = rkey; |
2537 | lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr; |
2538 | } |
2539 | |
2540 | /* add a new rtoken from peer */ |
2541 | int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey) |
2542 | { |
2543 | struct smc_link_group *lgr = smc_get_lgr(link: lnk); |
2544 | u64 dma_addr = be64_to_cpu(nw_vaddr); |
2545 | u32 rkey = ntohl(nw_rkey); |
2546 | int i; |
2547 | |
2548 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
2549 | if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && |
2550 | lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr && |
2551 | test_bit(i, lgr->rtokens_used_mask)) { |
2552 | /* already in list */ |
2553 | return i; |
2554 | } |
2555 | } |
2556 | i = smc_rmb_reserve_rtoken_idx(lgr); |
2557 | if (i < 0) |
2558 | return i; |
2559 | lgr->rtokens[i][lnk->link_idx].rkey = rkey; |
2560 | lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr; |
2561 | return i; |
2562 | } |
2563 | |
2564 | /* delete an rtoken from all links */ |
2565 | int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey) |
2566 | { |
2567 | struct smc_link_group *lgr = smc_get_lgr(link: lnk); |
2568 | u32 rkey = ntohl(nw_rkey); |
2569 | int i, j; |
2570 | |
2571 | for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) { |
2572 | if (lgr->rtokens[i][lnk->link_idx].rkey == rkey && |
2573 | test_bit(i, lgr->rtokens_used_mask)) { |
2574 | for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) { |
2575 | lgr->rtokens[i][j].rkey = 0; |
2576 | lgr->rtokens[i][j].dma_addr = 0; |
2577 | } |
2578 | clear_bit(nr: i, addr: lgr->rtokens_used_mask); |
2579 | return 0; |
2580 | } |
2581 | } |
2582 | return -ENOENT; |
2583 | } |
2584 | |
2585 | /* save rkey and dma_addr received from peer during clc handshake */ |
2586 | int smc_rmb_rtoken_handling(struct smc_connection *conn, |
2587 | struct smc_link *lnk, |
2588 | struct smc_clc_msg_accept_confirm *clc) |
2589 | { |
2590 | conn->rtoken_idx = smc_rtoken_add(lnk, nw_vaddr: clc->r0.rmb_dma_addr, |
2591 | nw_rkey: clc->r0.rmb_rkey); |
2592 | if (conn->rtoken_idx < 0) |
2593 | return conn->rtoken_idx; |
2594 | return 0; |
2595 | } |
2596 | |
2597 | static void smc_core_going_away(void) |
2598 | { |
2599 | struct smc_ib_device *smcibdev; |
2600 | struct smcd_dev *smcd; |
2601 | |
2602 | mutex_lock(&smc_ib_devices.mutex); |
2603 | list_for_each_entry(smcibdev, &smc_ib_devices.list, list) { |
2604 | int i; |
2605 | |
2606 | for (i = 0; i < SMC_MAX_PORTS; i++) |
2607 | set_bit(nr: i, addr: smcibdev->ports_going_away); |
2608 | } |
2609 | mutex_unlock(lock: &smc_ib_devices.mutex); |
2610 | |
2611 | mutex_lock(&smcd_dev_list.mutex); |
2612 | list_for_each_entry(smcd, &smcd_dev_list.list, list) { |
2613 | smcd->going_away = 1; |
2614 | } |
2615 | mutex_unlock(lock: &smcd_dev_list.mutex); |
2616 | } |
2617 | |
2618 | /* Clean up all SMC link groups */ |
2619 | static void smc_lgrs_shutdown(void) |
2620 | { |
2621 | struct smcd_dev *smcd; |
2622 | |
2623 | smc_core_going_away(); |
2624 | |
2625 | smc_smcr_terminate_all(NULL); |
2626 | |
2627 | mutex_lock(&smcd_dev_list.mutex); |
2628 | list_for_each_entry(smcd, &smcd_dev_list.list, list) |
2629 | smc_smcd_terminate_all(smcd); |
2630 | mutex_unlock(lock: &smcd_dev_list.mutex); |
2631 | } |
2632 | |
2633 | static int smc_core_reboot_event(struct notifier_block *this, |
2634 | unsigned long event, void *ptr) |
2635 | { |
2636 | smc_lgrs_shutdown(); |
2637 | smc_ib_unregister_client(); |
2638 | smc_ism_exit(); |
2639 | return 0; |
2640 | } |
2641 | |
2642 | static struct notifier_block smc_reboot_notifier = { |
2643 | .notifier_call = smc_core_reboot_event, |
2644 | }; |
2645 | |
2646 | int __init smc_core_init(void) |
2647 | { |
2648 | return register_reboot_notifier(&smc_reboot_notifier); |
2649 | } |
2650 | |
2651 | /* Called (from smc_exit) when module is removed */ |
2652 | void smc_core_exit(void) |
2653 | { |
2654 | unregister_reboot_notifier(&smc_reboot_notifier); |
2655 | smc_lgrs_shutdown(); |
2656 | } |
2657 | |