| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | /* |
| 3 | * Copyright (C) 2022-2024, Advanced Micro Devices, Inc. |
| 4 | */ |
| 5 | |
| 6 | #include <drm/drm_device.h> |
| 7 | #include <drm/drm_managed.h> |
| 8 | #include <drm/drm_print.h> |
| 9 | #include <linux/bitops.h> |
| 10 | #include <linux/bitmap.h> |
| 11 | #include <linux/slab.h> |
| 12 | |
| 13 | #include "aie2_solver.h" |
| 14 | |
| 15 | struct partition_node { |
| 16 | struct list_head list; |
| 17 | u32 nshared; /* # shared requests */ |
| 18 | u32 start_col; /* start column */ |
| 19 | u32 ncols; /* # columns */ |
| 20 | bool exclusive; /* can not be shared if set */ |
| 21 | }; |
| 22 | |
| 23 | struct solver_node { |
| 24 | struct list_head list; |
| 25 | u64 rid; /* Request ID from consumer */ |
| 26 | |
| 27 | struct partition_node *pt_node; |
| 28 | void *cb_arg; |
| 29 | u32 dpm_level; |
| 30 | u32 cols_len; |
| 31 | u32 start_cols[] __counted_by(cols_len); |
| 32 | }; |
| 33 | |
| 34 | struct solver_rgroup { |
| 35 | u32 rgid; |
| 36 | u32 nnode; |
| 37 | u32 npartition_node; |
| 38 | |
| 39 | DECLARE_BITMAP(resbit, XRS_MAX_COL); |
| 40 | struct list_head node_list; |
| 41 | struct list_head pt_node_list; |
| 42 | }; |
| 43 | |
| 44 | struct solver_state { |
| 45 | struct solver_rgroup rgp; |
| 46 | struct init_config cfg; |
| 47 | struct xrs_action_ops *actions; |
| 48 | }; |
| 49 | |
| 50 | static u32 calculate_gops(struct aie_qos *rqos) |
| 51 | { |
| 52 | u32 service_rate = 0; |
| 53 | |
| 54 | if (rqos->latency) |
| 55 | service_rate = (1000 / rqos->latency); |
| 56 | |
| 57 | if (rqos->fps > service_rate) |
| 58 | return rqos->fps * rqos->gops; |
| 59 | |
| 60 | return service_rate * rqos->gops; |
| 61 | } |
| 62 | |
| 63 | /* |
| 64 | * qos_meet() - Check the QOS request can be met. |
| 65 | */ |
| 66 | static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops) |
| 67 | { |
| 68 | u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor; |
| 69 | |
| 70 | if (request_gops <= cgops) |
| 71 | return 0; |
| 72 | |
| 73 | return -EINVAL; |
| 74 | } |
| 75 | |
| 76 | /* |
| 77 | * sanity_check() - Do a basic sanity check on allocation request. |
| 78 | */ |
| 79 | static int sanity_check(struct solver_state *xrs, struct alloc_requests *req) |
| 80 | { |
| 81 | struct cdo_parts *cdop = &req->cdo; |
| 82 | struct aie_qos *rqos = &req->rqos; |
| 83 | u32 cu_clk_freq; |
| 84 | |
| 85 | if (cdop->ncols > xrs->cfg.total_col) |
| 86 | return -EINVAL; |
| 87 | |
| 88 | /* |
| 89 | * We can find at least one CDOs groups that meet the |
| 90 | * GOPs requirement. |
| 91 | */ |
| 92 | cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1]; |
| 93 | |
| 94 | if (qos_meet(xrs, rqos, cgops: cdop->qos_cap.opc * cu_clk_freq / 1000)) |
| 95 | return -EINVAL; |
| 96 | |
| 97 | return 0; |
| 98 | } |
| 99 | |
| 100 | static bool is_valid_qos_dpm_params(struct aie_qos *rqos) |
| 101 | { |
| 102 | /* |
| 103 | * gops is retrieved from the xmodel, so it's always set |
| 104 | * fps and latency are the configurable params from the application |
| 105 | */ |
| 106 | if (rqos->gops > 0 && (rqos->fps > 0 || rqos->latency > 0)) |
| 107 | return true; |
| 108 | |
| 109 | return false; |
| 110 | } |
| 111 | |
| 112 | static int set_dpm_level(struct solver_state *xrs, struct alloc_requests *req, u32 *dpm_level) |
| 113 | { |
| 114 | struct solver_rgroup *rgp = &xrs->rgp; |
| 115 | struct cdo_parts *cdop = &req->cdo; |
| 116 | struct aie_qos *rqos = &req->rqos; |
| 117 | u32 freq, max_dpm_level, level; |
| 118 | struct solver_node *node; |
| 119 | |
| 120 | max_dpm_level = xrs->cfg.clk_list.num_levels - 1; |
| 121 | /* If no QoS parameters are passed, set it to the max DPM level */ |
| 122 | if (!is_valid_qos_dpm_params(rqos)) { |
| 123 | level = max_dpm_level; |
| 124 | goto set_dpm; |
| 125 | } |
| 126 | |
| 127 | /* Find one CDO group that meet the GOPs requirement. */ |
| 128 | for (level = 0; level < max_dpm_level; level++) { |
| 129 | freq = xrs->cfg.clk_list.cu_clk_list[level]; |
| 130 | if (!qos_meet(xrs, rqos, cgops: cdop->qos_cap.opc * freq / 1000)) |
| 131 | break; |
| 132 | } |
| 133 | |
| 134 | /* set the dpm level which fits all the sessions */ |
| 135 | list_for_each_entry(node, &rgp->node_list, list) { |
| 136 | if (node->dpm_level > level) |
| 137 | level = node->dpm_level; |
| 138 | } |
| 139 | |
| 140 | set_dpm: |
| 141 | *dpm_level = level; |
| 142 | return xrs->cfg.actions->set_dft_dpm_level(xrs->cfg.ddev, level); |
| 143 | } |
| 144 | |
| 145 | static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid) |
| 146 | { |
| 147 | struct solver_node *node; |
| 148 | |
| 149 | list_for_each_entry(node, &rgp->node_list, list) { |
| 150 | if (node->rid == rid) |
| 151 | return node; |
| 152 | } |
| 153 | |
| 154 | return NULL; |
| 155 | } |
| 156 | |
| 157 | static void remove_partition_node(struct solver_rgroup *rgp, |
| 158 | struct partition_node *pt_node) |
| 159 | { |
| 160 | pt_node->nshared--; |
| 161 | if (pt_node->nshared > 0) |
| 162 | return; |
| 163 | |
| 164 | list_del(entry: &pt_node->list); |
| 165 | rgp->npartition_node--; |
| 166 | |
| 167 | bitmap_clear(map: rgp->resbit, start: pt_node->start_col, nbits: pt_node->ncols); |
| 168 | kfree(objp: pt_node); |
| 169 | } |
| 170 | |
| 171 | static void remove_solver_node(struct solver_rgroup *rgp, |
| 172 | struct solver_node *node) |
| 173 | { |
| 174 | list_del(entry: &node->list); |
| 175 | rgp->nnode--; |
| 176 | |
| 177 | if (node->pt_node) |
| 178 | remove_partition_node(rgp, pt_node: node->pt_node); |
| 179 | |
| 180 | kfree(objp: node); |
| 181 | } |
| 182 | |
| 183 | static int get_free_partition(struct solver_state *xrs, |
| 184 | struct solver_node *snode, |
| 185 | struct alloc_requests *req) |
| 186 | { |
| 187 | struct partition_node *pt_node; |
| 188 | u32 ncols = req->cdo.ncols; |
| 189 | u32 col, i; |
| 190 | |
| 191 | for (i = 0; i < snode->cols_len; i++) { |
| 192 | col = snode->start_cols[i]; |
| 193 | if (find_next_bit(addr: xrs->rgp.resbit, XRS_MAX_COL, offset: col) >= col + ncols) |
| 194 | break; |
| 195 | } |
| 196 | |
| 197 | if (i == snode->cols_len) |
| 198 | return -ENODEV; |
| 199 | |
| 200 | pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL); |
| 201 | if (!pt_node) |
| 202 | return -ENOMEM; |
| 203 | |
| 204 | pt_node->nshared = 1; |
| 205 | pt_node->start_col = col; |
| 206 | pt_node->ncols = ncols; |
| 207 | |
| 208 | /* |
| 209 | * Always set exclusive to false for now. |
| 210 | */ |
| 211 | pt_node->exclusive = false; |
| 212 | |
| 213 | list_add_tail(new: &pt_node->list, head: &xrs->rgp.pt_node_list); |
| 214 | xrs->rgp.npartition_node++; |
| 215 | bitmap_set(map: xrs->rgp.resbit, start: pt_node->start_col, nbits: pt_node->ncols); |
| 216 | |
| 217 | snode->pt_node = pt_node; |
| 218 | |
| 219 | return 0; |
| 220 | } |
| 221 | |
| 222 | static int allocate_partition(struct solver_state *xrs, |
| 223 | struct solver_node *snode, |
| 224 | struct alloc_requests *req) |
| 225 | { |
| 226 | struct partition_node *pt_node, *rpt_node = NULL; |
| 227 | int idx, ret; |
| 228 | |
| 229 | ret = get_free_partition(xrs, snode, req); |
| 230 | if (!ret) |
| 231 | return ret; |
| 232 | |
| 233 | /* try to get a share-able partition */ |
| 234 | list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) { |
| 235 | if (pt_node->exclusive) |
| 236 | continue; |
| 237 | |
| 238 | if (rpt_node && pt_node->nshared >= rpt_node->nshared) |
| 239 | continue; |
| 240 | |
| 241 | for (idx = 0; idx < snode->cols_len; idx++) { |
| 242 | if (snode->start_cols[idx] != pt_node->start_col) |
| 243 | continue; |
| 244 | |
| 245 | if (req->cdo.ncols != pt_node->ncols) |
| 246 | continue; |
| 247 | |
| 248 | rpt_node = pt_node; |
| 249 | break; |
| 250 | } |
| 251 | } |
| 252 | |
| 253 | if (!rpt_node) |
| 254 | return -ENODEV; |
| 255 | |
| 256 | rpt_node->nshared++; |
| 257 | snode->pt_node = rpt_node; |
| 258 | |
| 259 | return 0; |
| 260 | } |
| 261 | |
| 262 | static struct solver_node *create_solver_node(struct solver_state *xrs, |
| 263 | struct alloc_requests *req) |
| 264 | { |
| 265 | struct cdo_parts *cdop = &req->cdo; |
| 266 | struct solver_node *node; |
| 267 | int ret; |
| 268 | |
| 269 | node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL); |
| 270 | if (!node) |
| 271 | return ERR_PTR(error: -ENOMEM); |
| 272 | |
| 273 | node->rid = req->rid; |
| 274 | node->cols_len = cdop->cols_len; |
| 275 | memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32)); |
| 276 | |
| 277 | ret = allocate_partition(xrs, snode: node, req); |
| 278 | if (ret) |
| 279 | goto free_node; |
| 280 | |
| 281 | list_add_tail(new: &node->list, head: &xrs->rgp.node_list); |
| 282 | xrs->rgp.nnode++; |
| 283 | return node; |
| 284 | |
| 285 | free_node: |
| 286 | kfree(objp: node); |
| 287 | return ERR_PTR(error: ret); |
| 288 | } |
| 289 | |
| 290 | static void fill_load_action(struct solver_state *xrs, |
| 291 | struct solver_node *snode, |
| 292 | struct xrs_action_load *action) |
| 293 | { |
| 294 | action->rid = snode->rid; |
| 295 | action->part.start_col = snode->pt_node->start_col; |
| 296 | action->part.ncols = snode->pt_node->ncols; |
| 297 | } |
| 298 | |
| 299 | int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg) |
| 300 | { |
| 301 | struct xrs_action_load load_act; |
| 302 | struct solver_node *snode; |
| 303 | struct solver_state *xrs; |
| 304 | u32 dpm_level; |
| 305 | int ret; |
| 306 | |
| 307 | xrs = (struct solver_state *)hdl; |
| 308 | |
| 309 | ret = sanity_check(xrs, req); |
| 310 | if (ret) { |
| 311 | drm_err(xrs->cfg.ddev, "invalid request" ); |
| 312 | return ret; |
| 313 | } |
| 314 | |
| 315 | if (rg_search_node(rgp: &xrs->rgp, rid: req->rid)) { |
| 316 | drm_err(xrs->cfg.ddev, "rid %lld is in-use" , req->rid); |
| 317 | return -EEXIST; |
| 318 | } |
| 319 | |
| 320 | snode = create_solver_node(xrs, req); |
| 321 | if (IS_ERR(ptr: snode)) |
| 322 | return PTR_ERR(ptr: snode); |
| 323 | |
| 324 | fill_load_action(xrs, snode, action: &load_act); |
| 325 | ret = xrs->cfg.actions->load(cb_arg, &load_act); |
| 326 | if (ret) |
| 327 | goto free_node; |
| 328 | |
| 329 | ret = set_dpm_level(xrs, req, dpm_level: &dpm_level); |
| 330 | if (ret) |
| 331 | goto free_node; |
| 332 | |
| 333 | snode->dpm_level = dpm_level; |
| 334 | snode->cb_arg = cb_arg; |
| 335 | |
| 336 | drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n" , |
| 337 | snode->pt_node->start_col, snode->pt_node->ncols); |
| 338 | |
| 339 | return 0; |
| 340 | |
| 341 | free_node: |
| 342 | remove_solver_node(rgp: &xrs->rgp, node: snode); |
| 343 | |
| 344 | return ret; |
| 345 | } |
| 346 | |
| 347 | int xrs_release_resource(void *hdl, u64 rid) |
| 348 | { |
| 349 | struct solver_state *xrs = hdl; |
| 350 | struct solver_node *node; |
| 351 | |
| 352 | node = rg_search_node(rgp: &xrs->rgp, rid); |
| 353 | if (!node) { |
| 354 | drm_err(xrs->cfg.ddev, "node not exist" ); |
| 355 | return -ENODEV; |
| 356 | } |
| 357 | |
| 358 | xrs->cfg.actions->unload(node->cb_arg); |
| 359 | remove_solver_node(rgp: &xrs->rgp, node); |
| 360 | |
| 361 | return 0; |
| 362 | } |
| 363 | |
| 364 | void *xrsm_init(struct init_config *cfg) |
| 365 | { |
| 366 | struct solver_rgroup *rgp; |
| 367 | struct solver_state *xrs; |
| 368 | |
| 369 | xrs = drmm_kzalloc(dev: cfg->ddev, size: sizeof(*xrs), GFP_KERNEL); |
| 370 | if (!xrs) |
| 371 | return NULL; |
| 372 | |
| 373 | memcpy(&xrs->cfg, cfg, sizeof(*cfg)); |
| 374 | |
| 375 | rgp = &xrs->rgp; |
| 376 | INIT_LIST_HEAD(list: &rgp->node_list); |
| 377 | INIT_LIST_HEAD(list: &rgp->pt_node_list); |
| 378 | |
| 379 | return xrs; |
| 380 | } |
| 381 | |