| 1 | // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB |
| 2 | /* |
| 3 | * Copyright (c) 2018 Mellanox Technologies. All rights reserved. |
| 4 | */ |
| 5 | |
| 6 | #include <linux/mlx5/vport.h> |
| 7 | #include "ib_rep.h" |
| 8 | #include "srq.h" |
| 9 | |
| 10 | static int |
| 11 | mlx5_ib_set_vport_rep(struct mlx5_core_dev *dev, |
| 12 | struct mlx5_eswitch_rep *rep, |
| 13 | int vport_index) |
| 14 | { |
| 15 | struct mlx5_ib_dev *ibdev; |
| 16 | struct net_device *ndev; |
| 17 | |
| 18 | ibdev = mlx5_eswitch_uplink_get_proto_dev(esw: dev->priv.eswitch, rep_type: REP_IB); |
| 19 | if (!ibdev) |
| 20 | return -EINVAL; |
| 21 | |
| 22 | ibdev->port[vport_index].rep = rep; |
| 23 | rep->rep_data[REP_IB].priv = ibdev; |
| 24 | ndev = mlx5_ib_get_rep_netdev(esw: rep->esw, vport_num: rep->vport); |
| 25 | |
| 26 | return ib_device_set_netdev(ib_dev: &ibdev->ib_dev, ndev, port: vport_index + 1); |
| 27 | } |
| 28 | |
| 29 | static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev); |
| 30 | |
| 31 | static void mlx5_ib_num_ports_update(struct mlx5_core_dev *dev, u32 *num_ports) |
| 32 | { |
| 33 | struct mlx5_core_dev *peer_dev; |
| 34 | int i; |
| 35 | |
| 36 | mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { |
| 37 | u32 peer_num_ports = mlx5_eswitch_get_total_vports(dev: peer_dev); |
| 38 | |
| 39 | if (mlx5_lag_is_mpesw(dev: peer_dev)) |
| 40 | *num_ports += peer_num_ports; |
| 41 | else |
| 42 | /* Only 1 ib port is the representor for all uplinks */ |
| 43 | *num_ports += peer_num_ports - 1; |
| 44 | } |
| 45 | } |
| 46 | |
| 47 | static int mlx5_ib_set_owner_transport(struct mlx5_core_dev *cur_owner, |
| 48 | struct mlx5_core_dev *new_owner) |
| 49 | { |
| 50 | int ret; |
| 51 | |
| 52 | if (!MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_TX(cur_owner, ft_support) || |
| 53 | !MLX5_CAP_FLOWTABLE_RDMA_TRANSPORT_RX(cur_owner, ft_support)) |
| 54 | return 0; |
| 55 | |
| 56 | if (!MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager) || |
| 57 | !MLX5_CAP_ADV_RDMA(new_owner, rdma_transport_manager_other_eswitch)) |
| 58 | return 0; |
| 59 | |
| 60 | ret = mlx5_fs_set_root_dev(dev: cur_owner, new_dev: new_owner, |
| 61 | table_type: FS_FT_RDMA_TRANSPORT_TX); |
| 62 | if (ret) |
| 63 | return ret; |
| 64 | |
| 65 | ret = mlx5_fs_set_root_dev(dev: cur_owner, new_dev: new_owner, |
| 66 | table_type: FS_FT_RDMA_TRANSPORT_RX); |
| 67 | if (ret) { |
| 68 | mlx5_fs_set_root_dev(dev: cur_owner, new_dev: cur_owner, |
| 69 | table_type: FS_FT_RDMA_TRANSPORT_TX); |
| 70 | return ret; |
| 71 | } |
| 72 | |
| 73 | return 0; |
| 74 | } |
| 75 | |
| 76 | static void mlx5_ib_release_transport(struct mlx5_core_dev *dev) |
| 77 | { |
| 78 | struct mlx5_core_dev *peer_dev; |
| 79 | int i, ret; |
| 80 | |
| 81 | mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { |
| 82 | ret = mlx5_ib_set_owner_transport(cur_owner: peer_dev, new_owner: peer_dev); |
| 83 | WARN_ON_ONCE(ret); |
| 84 | } |
| 85 | } |
| 86 | |
| 87 | static int mlx5_ib_take_transport(struct mlx5_core_dev *dev) |
| 88 | { |
| 89 | struct mlx5_core_dev *peer_dev; |
| 90 | int ret; |
| 91 | int i; |
| 92 | |
| 93 | mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { |
| 94 | ret = mlx5_ib_set_owner_transport(cur_owner: peer_dev, new_owner: dev); |
| 95 | if (ret) { |
| 96 | mlx5_ib_release_transport(dev); |
| 97 | return ret; |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | return 0; |
| 102 | } |
| 103 | |
| 104 | static int |
| 105 | mlx5_ib_vport_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) |
| 106 | { |
| 107 | u32 num_ports = mlx5_eswitch_get_total_vports(dev); |
| 108 | struct mlx5_core_dev *lag_master = dev; |
| 109 | const struct mlx5_ib_profile *profile; |
| 110 | struct mlx5_core_dev *peer_dev; |
| 111 | struct mlx5_ib_dev *ibdev; |
| 112 | int new_uplink = false; |
| 113 | int vport_index; |
| 114 | int ret; |
| 115 | int i; |
| 116 | |
| 117 | vport_index = rep->vport_index; |
| 118 | |
| 119 | if (mlx5_lag_is_shared_fdb(dev)) { |
| 120 | if (mlx5_lag_is_master(dev)) { |
| 121 | mlx5_ib_num_ports_update(dev, num_ports: &num_ports); |
| 122 | } else { |
| 123 | if (rep->vport == MLX5_VPORT_UPLINK) { |
| 124 | if (!mlx5_lag_is_mpesw(dev)) |
| 125 | return 0; |
| 126 | new_uplink = true; |
| 127 | } |
| 128 | mlx5_lag_for_each_peer_mdev(dev, peer_dev, i) { |
| 129 | u32 peer_n_ports = mlx5_eswitch_get_total_vports(dev: peer_dev); |
| 130 | |
| 131 | if (mlx5_lag_is_master(dev: peer_dev)) |
| 132 | lag_master = peer_dev; |
| 133 | else if (!mlx5_lag_is_mpesw(dev)) |
| 134 | /* Only 1 ib port is the representor for all uplinks */ |
| 135 | peer_n_ports--; |
| 136 | |
| 137 | if (mlx5_get_dev_index(dev: peer_dev) < mlx5_get_dev_index(dev)) |
| 138 | vport_index += peer_n_ports; |
| 139 | } |
| 140 | } |
| 141 | } |
| 142 | |
| 143 | if (rep->vport == MLX5_VPORT_UPLINK && !new_uplink) |
| 144 | profile = &raw_eth_profile; |
| 145 | else |
| 146 | return mlx5_ib_set_vport_rep(dev: lag_master, rep, vport_index); |
| 147 | |
| 148 | if (mlx5_lag_is_shared_fdb(dev)) { |
| 149 | ret = mlx5_ib_take_transport(dev: lag_master); |
| 150 | if (ret) |
| 151 | return ret; |
| 152 | } |
| 153 | |
| 154 | ibdev = ib_alloc_device_with_net(mlx5_ib_dev, ib_dev, |
| 155 | mlx5_core_net(lag_master)); |
| 156 | if (!ibdev) { |
| 157 | ret = -ENOMEM; |
| 158 | goto release_transport; |
| 159 | } |
| 160 | |
| 161 | ibdev->port = kcalloc(num_ports, sizeof(*ibdev->port), |
| 162 | GFP_KERNEL); |
| 163 | if (!ibdev->port) { |
| 164 | ret = -ENOMEM; |
| 165 | goto fail_port; |
| 166 | } |
| 167 | |
| 168 | ibdev->is_rep = true; |
| 169 | vport_index = rep->vport_index; |
| 170 | ibdev->port[vport_index].rep = rep; |
| 171 | ibdev->mdev = lag_master; |
| 172 | ibdev->num_ports = num_ports; |
| 173 | ibdev->ib_dev.phys_port_cnt = num_ports; |
| 174 | ret = ib_device_set_netdev(ib_dev: &ibdev->ib_dev, |
| 175 | ndev: mlx5_ib_get_rep_netdev(esw: lag_master->priv.eswitch, |
| 176 | vport_num: rep->vport), |
| 177 | port: vport_index + 1); |
| 178 | if (ret) |
| 179 | goto fail_add; |
| 180 | |
| 181 | ret = __mlx5_ib_add(dev: ibdev, profile); |
| 182 | if (ret) |
| 183 | goto fail_add; |
| 184 | |
| 185 | rep->rep_data[REP_IB].priv = ibdev; |
| 186 | if (mlx5_lag_is_shared_fdb(dev: lag_master)) |
| 187 | mlx5_ib_register_peer_vport_reps(mdev: lag_master); |
| 188 | |
| 189 | return 0; |
| 190 | |
| 191 | fail_add: |
| 192 | kfree(objp: ibdev->port); |
| 193 | fail_port: |
| 194 | ib_dealloc_device(device: &ibdev->ib_dev); |
| 195 | release_transport: |
| 196 | if (mlx5_lag_is_shared_fdb(dev: lag_master)) |
| 197 | mlx5_ib_release_transport(dev: lag_master); |
| 198 | |
| 199 | return ret; |
| 200 | } |
| 201 | |
| 202 | static void *mlx5_ib_rep_to_dev(struct mlx5_eswitch_rep *rep) |
| 203 | { |
| 204 | return rep->rep_data[REP_IB].priv; |
| 205 | } |
| 206 | |
| 207 | static void |
| 208 | mlx5_ib_vport_rep_unload(struct mlx5_eswitch_rep *rep) |
| 209 | { |
| 210 | struct mlx5_core_dev *mdev = mlx5_eswitch_get_core_dev(esw: rep->esw); |
| 211 | struct mlx5_ib_dev *dev = mlx5_ib_rep_to_dev(rep); |
| 212 | int vport_index = rep->vport_index; |
| 213 | struct mlx5_ib_port *port; |
| 214 | int i; |
| 215 | |
| 216 | if (WARN_ON(!mdev)) |
| 217 | return; |
| 218 | |
| 219 | if (!dev) |
| 220 | return; |
| 221 | |
| 222 | if (mlx5_lag_is_shared_fdb(dev: mdev) && |
| 223 | !mlx5_lag_is_master(dev: mdev)) { |
| 224 | if (rep->vport == MLX5_VPORT_UPLINK && !mlx5_lag_is_mpesw(dev: mdev)) |
| 225 | return; |
| 226 | for (i = 0; i < dev->num_ports; i++) { |
| 227 | if (dev->port[i].rep == rep) |
| 228 | break; |
| 229 | } |
| 230 | if (WARN_ON(i == dev->num_ports)) |
| 231 | return; |
| 232 | vport_index = i; |
| 233 | } |
| 234 | |
| 235 | port = &dev->port[vport_index]; |
| 236 | |
| 237 | ib_device_set_netdev(ib_dev: &dev->ib_dev, NULL, port: vport_index + 1); |
| 238 | rep->rep_data[REP_IB].priv = NULL; |
| 239 | port->rep = NULL; |
| 240 | |
| 241 | if (rep->vport == MLX5_VPORT_UPLINK) { |
| 242 | |
| 243 | if (mlx5_lag_is_shared_fdb(dev: mdev) && !mlx5_lag_is_master(dev: mdev)) |
| 244 | return; |
| 245 | |
| 246 | if (mlx5_lag_is_shared_fdb(dev: mdev)) { |
| 247 | struct mlx5_core_dev *peer_mdev; |
| 248 | struct mlx5_eswitch *esw; |
| 249 | |
| 250 | mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) { |
| 251 | esw = peer_mdev->priv.eswitch; |
| 252 | mlx5_eswitch_unregister_vport_reps(esw, rep_type: REP_IB); |
| 253 | } |
| 254 | mlx5_ib_release_transport(dev: mdev); |
| 255 | } |
| 256 | __mlx5_ib_remove(dev, profile: dev->profile, stage: MLX5_IB_STAGE_MAX); |
| 257 | } |
| 258 | } |
| 259 | |
| 260 | static const struct mlx5_eswitch_rep_ops rep_ops = { |
| 261 | .load = mlx5_ib_vport_rep_load, |
| 262 | .unload = mlx5_ib_vport_rep_unload, |
| 263 | .get_proto_dev = mlx5_ib_rep_to_dev, |
| 264 | }; |
| 265 | |
| 266 | static void mlx5_ib_register_peer_vport_reps(struct mlx5_core_dev *mdev) |
| 267 | { |
| 268 | struct mlx5_core_dev *peer_mdev; |
| 269 | struct mlx5_eswitch *esw; |
| 270 | int i; |
| 271 | |
| 272 | mlx5_lag_for_each_peer_mdev(mdev, peer_mdev, i) { |
| 273 | esw = peer_mdev->priv.eswitch; |
| 274 | mlx5_eswitch_register_vport_reps(esw, ops: &rep_ops, rep_type: REP_IB); |
| 275 | } |
| 276 | } |
| 277 | |
| 278 | struct net_device *mlx5_ib_get_rep_netdev(struct mlx5_eswitch *esw, |
| 279 | u16 vport_num) |
| 280 | { |
| 281 | return mlx5_eswitch_get_proto_dev(esw, vport_num, rep_type: REP_ETH); |
| 282 | } |
| 283 | |
| 284 | struct mlx5_flow_handle *create_flow_rule_vport_sq(struct mlx5_ib_dev *dev, |
| 285 | struct mlx5_ib_sq *sq, |
| 286 | u32 port) |
| 287 | { |
| 288 | struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; |
| 289 | struct mlx5_eswitch_rep *rep; |
| 290 | |
| 291 | if (!dev->is_rep || !port) |
| 292 | return NULL; |
| 293 | |
| 294 | if (!dev->port[port - 1].rep) |
| 295 | return ERR_PTR(error: -EINVAL); |
| 296 | |
| 297 | rep = dev->port[port - 1].rep; |
| 298 | |
| 299 | return mlx5_eswitch_add_send_to_vport_rule(on_esw: esw, from_esw: esw, rep, sqn: sq->base.mqp.qpn); |
| 300 | } |
| 301 | |
| 302 | static int mlx5r_rep_probe(struct auxiliary_device *adev, |
| 303 | const struct auxiliary_device_id *id) |
| 304 | { |
| 305 | struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); |
| 306 | struct mlx5_core_dev *mdev = idev->mdev; |
| 307 | struct mlx5_eswitch *esw; |
| 308 | |
| 309 | esw = mdev->priv.eswitch; |
| 310 | mlx5_eswitch_register_vport_reps(esw, ops: &rep_ops, rep_type: REP_IB); |
| 311 | return 0; |
| 312 | } |
| 313 | |
| 314 | static void mlx5r_rep_remove(struct auxiliary_device *adev) |
| 315 | { |
| 316 | struct mlx5_adev *idev = container_of(adev, struct mlx5_adev, adev); |
| 317 | struct mlx5_core_dev *mdev = idev->mdev; |
| 318 | struct mlx5_eswitch *esw; |
| 319 | |
| 320 | esw = mdev->priv.eswitch; |
| 321 | mlx5_eswitch_unregister_vport_reps(esw, rep_type: REP_IB); |
| 322 | } |
| 323 | |
| 324 | static const struct auxiliary_device_id mlx5r_rep_id_table[] = { |
| 325 | { .name = MLX5_ADEV_NAME ".rdma-rep" , }, |
| 326 | {}, |
| 327 | }; |
| 328 | |
| 329 | MODULE_DEVICE_TABLE(auxiliary, mlx5r_rep_id_table); |
| 330 | |
| 331 | static struct auxiliary_driver mlx5r_rep_driver = { |
| 332 | .name = "rep" , |
| 333 | .probe = mlx5r_rep_probe, |
| 334 | .remove = mlx5r_rep_remove, |
| 335 | .id_table = mlx5r_rep_id_table, |
| 336 | }; |
| 337 | |
| 338 | int mlx5r_rep_init(void) |
| 339 | { |
| 340 | return auxiliary_driver_register(&mlx5r_rep_driver); |
| 341 | } |
| 342 | |
| 343 | void mlx5r_rep_cleanup(void) |
| 344 | { |
| 345 | auxiliary_driver_unregister(auxdrv: &mlx5r_rep_driver); |
| 346 | } |
| 347 | |