1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright (c) 2018 Mellanox Technologies. All rights reserved.
4 */
5
6#include <linux/mlx5/vport.h>
7#include "ib_rep.h"
8#include "srq.h"
9
10static int
11mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev,
12 struct mlx5_eswitch_rep *rep,
13 int vport_index)
14{
15 struct mlx5_ib_dev *ibdev;
16 struct net_device *ndev;
17
18 ibdev = mlx5_eswitch_uplink_get_proto_dev(esw: dev->priv.eswitch, rep_type: REP_IB);
19 if (!ibdev)
20 return -EINVAL;
21
22 ibdev->port[vport_index].rep = rep;
23 rep->rep_data[REP_IB].priv = ibdev;
24 ndev = mlx5_ib_get_rep_netdev(esw: rep->esw, vport_num: rep->vport);
25
26 return ib_device_set_netdev(ib_dev: &ibdev->ib_dev, ndev, port: vport_index + 1);
27}
28
29static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev);
30
31static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports)
32{
33 struct mlx5_core_dev *peer_dev;
34 int i;
35
36 mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
37 u32 peer_num_ports = mlx5_eswitch_get_total_vports(dev: peer_dev);
38
39 if (mlx5_lag_is_mpesw(dev: peer_dev))
40 *num_ports += peer_num_ports;
41 else
42 /* Only 1 ib port is the representor for all uplinks */
43 *num_ports += peer_num_ports - 1;
44 }
45}
46
47static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner,
48 struct mlx5_core_dev *new_owner)
49{
50 int ret;
51
52 if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) ||
53 !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support))
54 return 0;
55
56 if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) ||
57 !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch))
58 return 0;
59
60 ret = mlx5_fs_set_root_dev(dev: cur_owner, new_dev: new_owner,
61 table_type: FS_FT_RDMA_TRANSPORT_TX);
62 if (ret)
63 return ret;
64
65 ret = mlx5_fs_set_root_dev(dev: cur_owner, new_dev: new_owner,
66 table_type: FS_FT_RDMA_TRANSPORT_RX);
67 if (ret) {
68 mlx5_fs_set_root_dev(dev: cur_owner, new_dev: cur_owner,
69 table_type: FS_FT_RDMA_TRANSPORT_TX);
70 return ret;
71 }
72
73 return 0;
74}
75
76static void mlx5_ib_release_transport(struct mlx5_core_dev *dev)
77{
78 struct mlx5_core_dev *peer_dev;
79 int i, ret;
80
81 mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
82 ret = mlx5_ib_set_owner_transport(cur_owner: peer_dev, new_owner: peer_dev);
83 WARN_ON_ONCE(ret);
84 }
85}
86
87static int mlx5_ib_take_transport(struct mlx5_core_dev *dev)
88{
89 struct mlx5_core_dev *peer_dev;
90 int ret;
91 int i;
92
93 mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
94 ret = mlx5_ib_set_owner_transport(cur_owner: peer_dev, new_owner: dev);
95 if (ret) {
96 mlx5_ib_release_transport(dev);
97 return ret;
98 }
99 }
100
101 return 0;
102}
103
104static int
105mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep)
106{
107 u32 num_ports = mlx5_eswitch_get_total_vports(dev);
108 struct mlx5_core_dev *lag_master = dev;
109 const struct mlx5_ib_profile *profile;
110 struct mlx5_core_dev *peer_dev;
111 struct mlx5_ib_dev *ibdev;
112 int new_uplink = false;
113 int vport_index;
114 int ret;
115 int i;
116
117 vport_index = rep->vport_index;
118
119 if (mlx5_lag_is_shared_fdb(dev)) {
120 if (mlx5_lag_is_master(dev)) {
121 mlx5_ib_num_ports_update(dev, num_ports: &num_ports);
122 } else {
123 if (rep->vport == MLX5_VPORT_UPLINK) {
124 if (!mlx5_lag_is_mpesw(dev))
125 return 0;
126 new_uplink = true;
127 }
128 mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) {
129 u32 peer_n_ports = mlx5_eswitch_get_total_vports(dev: peer_dev);
130
131 if (mlx5_lag_is_master(dev: peer_dev))
132 lag_master = peer_dev;
133 else if (!mlx5_lag_is_mpesw(dev))
134 /* Only 1 ib port is the representor for all uplinks */
135 peer_n_ports--;
136
137 if (mlx5_get_dev_index(dev: peer_dev) < mlx5_get_dev_index(dev))
138 vport_index += peer_n_ports;
139 }
140 }
141 }
142
143 if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink)
144 profile = &raw_eth_profile;
145 else
146 return mlx5_ib_set_vport_rep(dev: lag_master, rep, vport_index);
147
148 if (mlx5_lag_is_shared_fdb(dev)) {
149 ret = mlx5_ib_take_transport(dev: lag_master);
150 if (ret)
151 return ret;
152 }
153
154 ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev,
155 mlx5_core_net(lag_master));
156 if (!ibdev) {
157 ret = -ENOMEM;
158 goto release_transport;
159 }
160
161 ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port),
162 GFP_KERNEL);
163 if (!ibdev->port) {
164 ret = -ENOMEM;
165 goto fail_port;
166 }
167
168 ibdev->is_rep = true;
169 vport_index = rep->vport_index;
170 ibdev->port[vport_index].rep = rep;
171 ibdev->mdev = lag_master;
172 ibdev->num_ports = num_ports;
173 ibdev->ib_dev.phys_port_cnt = num_ports;
174 ret = ib_device_set_netdev(ib_dev: &ibdev->ib_dev,
175 ndev: mlx5_ib_get_rep_netdev(esw: lag_master->priv.eswitch,
176 vport_num: rep->vport),
177 port: vport_index + 1);
178 if (ret)
179 goto fail_add;
180
181 ret = __mlx5_ib_add(dev: ibdev, profile);
182 if (ret)
183 goto fail_add;
184
185 rep->rep_data[REP_IB].priv = ibdev;
186 if (mlx5_lag_is_shared_fdb(dev: lag_master))
187 mlx5_ib_register_peer_vport_reps(mdev: lag_master);
188
189 return 0;
190
191fail_add:
192 kfree(objp: ibdev->port);
193fail_port:
194 ib_dealloc_device(device: &ibdev->ib_dev);
195release_transport:
196 if (mlx5_lag_is_shared_fdb(dev: lag_master))
197 mlx5_ib_release_transport(dev: lag_master);
198
199 return ret;
200}
201
202static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep)
203{
204 return rep->rep_data[REP_IB].priv;
205}
206
207static void
208mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep)
209{
210 struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(esw: rep->esw);
211 struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep);
212 int vport_index = rep->vport_index;
213 struct mlx5_ib_port *port;
214 int i;
215
216 if (WARN_ON(!mdev))
217 return;
218
219 if (!dev)
220 return;
221
222 if (mlx5_lag_is_shared_fdb(dev: mdev) &&
223 !mlx5_lag_is_master(dev: mdev)) {
224 if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(dev: mdev))
225 return;
226 for (i = 0; i < dev->num_ports; i++) {
227 if (dev->port[i].rep == rep)
228 break;
229 }
230 if (WARN_ON(i == dev->num_ports))
231 return;
232 vport_index = i;
233 }
234
235 port = &dev->port[vport_index];
236
237 ib_device_set_netdev(ib_dev: &dev->ib_dev, NULL, port: vport_index + 1);
238 rep->rep_data[REP_IB].priv = NULL;
239 port->rep = NULL;
240
241 if (rep->vport == MLX5_VPORT_UPLINK) {
242
243 if (mlx5_lag_is_shared_fdb(dev: mdev) && !mlx5_lag_is_master(dev: mdev))
244 return;
245
246 if (mlx5_lag_is_shared_fdb(dev: mdev)) {
247 struct mlx5_core_dev *peer_mdev;
248 struct mlx5_eswitch *esw;
249
250 mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
251 esw = peer_mdev->priv.eswitch;
252 mlx5_eswitch_unregister_vport_reps(esw, rep_type: REP_IB);
253 }
254 mlx5_ib_release_transport(dev: mdev);
255 }
256 __mlx5_ib_remove(dev, profile: dev->profile, stage: MLX5_IB_STAGE_MAX);
257 }
258}
259
260static const struct mlx5_eswitch_rep_ops rep_ops = {
261 .load = mlx5_ib_vport_rep_load,
262 .unload = mlx5_ib_vport_rep_unload,
263 .get_proto_dev = mlx5_ib_rep_to_dev,
264};
265
266static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev)
267{
268 struct mlx5_core_dev *peer_mdev;
269 struct mlx5_eswitch *esw;
270 int i;
271
272 mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) {
273 esw = peer_mdev->priv.eswitch;
274 mlx5_eswitch_register_vport_reps(esw, ops: &rep_ops, rep_type: REP_IB);
275 }
276}
277
278struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw,
279 u16 vport_num)
280{
281 return mlx5_eswitch_get_proto_dev(esw, vport_num, rep_type: REP_ETH);
282}
283
284struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev,
285 struct mlx5_ib_sq *sq,
286 u32 port)
287{
288 struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
289 struct mlx5_eswitch_rep *rep;
290
291 if (!dev->is_rep || !port)
292 return NULL;
293
294 if (!dev->port[port - 1].rep)
295 return ERR_PTR(error: -EINVAL);
296
297 rep = dev->port[port - 1].rep;
298
299 return mlx5_eswitch_add_send_to_vport_rule(on_esw: esw, from_esw: esw, rep, sqn: sq->base.mqp.qpn);
300}
301
302static int mlx5r_rep_probe(struct auxiliary_device *adev,
303 const struct auxiliary_device_id *id)
304{
305 struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
306 struct mlx5_core_dev *mdev = idev->mdev;
307 struct mlx5_eswitch *esw;
308
309 esw = mdev->priv.eswitch;
310 mlx5_eswitch_register_vport_reps(esw, ops: &rep_ops, rep_type: REP_IB);
311 return 0;
312}
313
314static void mlx5r_rep_remove(struct auxiliary_device *adev)
315{
316 struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev);
317 struct mlx5_core_dev *mdev = idev->mdev;
318 struct mlx5_eswitch *esw;
319
320 esw = mdev->priv.eswitch;
321 mlx5_eswitch_unregister_vport_reps(esw, rep_type: REP_IB);
322}
323
324static const struct auxiliary_device_id mlx5r_rep_id_table[] = {
325 { .name = MLX5_ADEV_NAME ".rdma-rep", },
326 {},
327};
328
329MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table);
330
331static struct auxiliary_driver mlx5r_rep_driver = {
332 .name = "rep",
333 .probe = mlx5r_rep_probe,
334 .remove = mlx5r_rep_remove,
335 .id_table = mlx5r_rep_id_table,
336};
337
338int mlx5r_rep_init(void)
339{
340 return auxiliary_driver_register(&mlx5r_rep_driver);
341}
342
343void mlx5r_rep_cleanup(void)
344{
345 auxiliary_driver_unregister(auxdrv: &mlx5r_rep_driver);
346}
347

source code of linux/drivers/infiniband/hw/mlx5/ib_rep.c