| 1 | // SPDX-License-Identifier: GPL-2.0 |
| 2 | |
| 3 | /* |
| 4 | * Copyright 2016-2019 HabanaLabs, Ltd. |
| 5 | * All Rights Reserved. |
| 6 | */ |
| 7 | |
| 8 | #include "../habanalabs.h" |
| 9 | #include "../../include/hw_ip/mmu/mmu_general.h" |
| 10 | |
| 11 | #include <linux/slab.h> |
| 12 | |
| 13 | #define MMU_V1_MAX_HOPS (MMU_HOP4 + 1) |
| 14 | |
| 15 | static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, |
| 16 | u64 *hop_addr_arr, u64 virt_addr, enum mmu_hop_num hop_idx) |
| 17 | { |
| 18 | u64 mask, shift; |
| 19 | |
| 20 | mask = mmu_prop->hop_masks[hop_idx]; |
| 21 | shift = mmu_prop->hop_shifts[hop_idx]; |
| 22 | return hop_addr_arr[hop_idx] + |
| 23 | ctx->hdev->asic_prop.mmu_pte_size * ((virt_addr & mask) >> shift); |
| 24 | } |
| 25 | |
| 26 | static int dram_default_mapping_init(struct hl_ctx *ctx) |
| 27 | { |
| 28 | struct hl_device *hdev = ctx->hdev; |
| 29 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
| 30 | u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, |
| 31 | hop2_pte_addr, hop3_pte_addr, pte_val; |
| 32 | int rc, i, j, hop3_allocated = 0; |
| 33 | |
| 34 | if ((!prop->dram_supports_virtual_memory) || |
| 35 | (!hdev->dram_default_page_mapping) || |
| 36 | (ctx->asid == HL_KERNEL_ASID_ID)) |
| 37 | return 0; |
| 38 | |
| 39 | num_of_hop3 = prop->dram_size_for_default_page_mapping; |
| 40 | do_div(num_of_hop3, prop->dram_page_size); |
| 41 | do_div(num_of_hop3, HOP_PTE_ENTRIES_512); |
| 42 | |
| 43 | /* add hop1 and hop2 */ |
| 44 | total_hops = num_of_hop3 + 2; |
| 45 | |
| 46 | ctx->dram_default_hops = kcalloc(total_hops, HL_PTE_SIZE, GFP_KERNEL); |
| 47 | if (!ctx->dram_default_hops) |
| 48 | return -ENOMEM; |
| 49 | |
| 50 | hop0_addr = hl_mmu_dr_get_hop0_addr(ctx); |
| 51 | |
| 52 | hop1_addr = hl_mmu_dr_alloc_hop(ctx); |
| 53 | if (hop1_addr == ULLONG_MAX) { |
| 54 | dev_err(hdev->dev, "failed to alloc hop 1\n" ); |
| 55 | rc = -ENOMEM; |
| 56 | goto hop1_err; |
| 57 | } |
| 58 | |
| 59 | ctx->dram_default_hops[total_hops - 1] = hop1_addr; |
| 60 | |
| 61 | hop2_addr = hl_mmu_dr_alloc_hop(ctx); |
| 62 | if (hop2_addr == ULLONG_MAX) { |
| 63 | dev_err(hdev->dev, "failed to alloc hop 2\n" ); |
| 64 | rc = -ENOMEM; |
| 65 | goto hop2_err; |
| 66 | } |
| 67 | |
| 68 | ctx->dram_default_hops[total_hops - 2] = hop2_addr; |
| 69 | |
| 70 | for (i = 0 ; i < num_of_hop3 ; i++) { |
| 71 | ctx->dram_default_hops[i] = hl_mmu_dr_alloc_hop(ctx); |
| 72 | if (ctx->dram_default_hops[i] == ULLONG_MAX) { |
| 73 | dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n" , i); |
| 74 | rc = -ENOMEM; |
| 75 | goto hop3_err; |
| 76 | } |
| 77 | hop3_allocated++; |
| 78 | } |
| 79 | |
| 80 | /* need only pte 0 in hops 0 and 1 */ |
| 81 | pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; |
| 82 | hl_mmu_dr_write_pte(ctx, shadow_pte_addr: hop0_addr, val: pte_val); |
| 83 | |
| 84 | pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; |
| 85 | hl_mmu_dr_write_pte(ctx, shadow_pte_addr: hop1_addr, val: pte_val); |
| 86 | hl_mmu_dr_get_pte(ctx, hop_addr: hop1_addr); |
| 87 | |
| 88 | hop2_pte_addr = hop2_addr; |
| 89 | for (i = 0 ; i < num_of_hop3 ; i++) { |
| 90 | pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) | |
| 91 | PAGE_PRESENT_MASK; |
| 92 | hl_mmu_dr_write_pte(ctx, shadow_pte_addr: hop2_pte_addr, val: pte_val); |
| 93 | hl_mmu_dr_get_pte(ctx, hop_addr: hop2_addr); |
| 94 | hop2_pte_addr += HL_PTE_SIZE; |
| 95 | } |
| 96 | |
| 97 | pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) | |
| 98 | LAST_MASK | PAGE_PRESENT_MASK; |
| 99 | |
| 100 | for (i = 0 ; i < num_of_hop3 ; i++) { |
| 101 | hop3_pte_addr = ctx->dram_default_hops[i]; |
| 102 | for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { |
| 103 | hl_mmu_dr_write_final_pte(ctx, shadow_pte_addr: hop3_pte_addr, val: pte_val); |
| 104 | hl_mmu_dr_get_pte(ctx, hop_addr: ctx->dram_default_hops[i]); |
| 105 | hop3_pte_addr += HL_PTE_SIZE; |
| 106 | } |
| 107 | } |
| 108 | |
| 109 | hl_mmu_dr_flush(ctx); |
| 110 | |
| 111 | return 0; |
| 112 | |
| 113 | hop3_err: |
| 114 | for (i = 0 ; i < hop3_allocated ; i++) |
| 115 | hl_mmu_dr_free_hop(ctx, hop_addr: ctx->dram_default_hops[i]); |
| 116 | |
| 117 | hl_mmu_dr_free_hop(ctx, hop_addr: hop2_addr); |
| 118 | hop2_err: |
| 119 | hl_mmu_dr_free_hop(ctx, hop_addr: hop1_addr); |
| 120 | hop1_err: |
| 121 | kfree(objp: ctx->dram_default_hops); |
| 122 | |
| 123 | return rc; |
| 124 | } |
| 125 | |
| 126 | static void dram_default_mapping_fini(struct hl_ctx *ctx) |
| 127 | { |
| 128 | struct hl_device *hdev = ctx->hdev; |
| 129 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
| 130 | u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr, |
| 131 | hop2_pte_addr, hop3_pte_addr; |
| 132 | int i, j; |
| 133 | |
| 134 | if ((!prop->dram_supports_virtual_memory) || |
| 135 | (!hdev->dram_default_page_mapping) || |
| 136 | (ctx->asid == HL_KERNEL_ASID_ID)) |
| 137 | return; |
| 138 | |
| 139 | num_of_hop3 = prop->dram_size_for_default_page_mapping; |
| 140 | do_div(num_of_hop3, prop->dram_page_size); |
| 141 | do_div(num_of_hop3, HOP_PTE_ENTRIES_512); |
| 142 | |
| 143 | hop0_addr = hl_mmu_dr_get_hop0_addr(ctx); |
| 144 | /* add hop1 and hop2 */ |
| 145 | total_hops = num_of_hop3 + 2; |
| 146 | hop1_addr = ctx->dram_default_hops[total_hops - 1]; |
| 147 | hop2_addr = ctx->dram_default_hops[total_hops - 2]; |
| 148 | |
| 149 | for (i = 0 ; i < num_of_hop3 ; i++) { |
| 150 | hop3_pte_addr = ctx->dram_default_hops[i]; |
| 151 | for (j = 0 ; j < HOP_PTE_ENTRIES_512 ; j++) { |
| 152 | hl_mmu_dr_clear_pte(ctx, pte_addr: hop3_pte_addr); |
| 153 | hl_mmu_dr_put_pte(ctx, hop_addr: ctx->dram_default_hops[i]); |
| 154 | hop3_pte_addr += HL_PTE_SIZE; |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | hop2_pte_addr = hop2_addr; |
| 159 | for (i = 0 ; i < num_of_hop3 ; i++) { |
| 160 | hl_mmu_dr_clear_pte(ctx, pte_addr: hop2_pte_addr); |
| 161 | hl_mmu_dr_put_pte(ctx, hop_addr: hop2_addr); |
| 162 | hop2_pte_addr += HL_PTE_SIZE; |
| 163 | } |
| 164 | |
| 165 | hl_mmu_dr_clear_pte(ctx, pte_addr: hop1_addr); |
| 166 | hl_mmu_dr_put_pte(ctx, hop_addr: hop1_addr); |
| 167 | hl_mmu_dr_clear_pte(ctx, pte_addr: hop0_addr); |
| 168 | |
| 169 | kfree(objp: ctx->dram_default_hops); |
| 170 | |
| 171 | hl_mmu_dr_flush(ctx); |
| 172 | } |
| 173 | |
| 174 | /** |
| 175 | * hl_mmu_v1_ctx_init() - initialize a context for using the MMU module. |
| 176 | * @ctx: pointer to the context structure to initialize. |
| 177 | * |
| 178 | * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all |
| 179 | * page tables hops related to this context. |
| 180 | * Return: 0 on success, non-zero otherwise. |
| 181 | */ |
| 182 | static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx) |
| 183 | { |
| 184 | hash_init(ctx->mmu_shadow_hash); |
| 185 | return dram_default_mapping_init(ctx); |
| 186 | } |
| 187 | |
| 188 | /* |
| 189 | * hl_mmu_ctx_fini - disable a ctx from using the mmu module |
| 190 | * |
| 191 | * @ctx: pointer to the context structure |
| 192 | * |
| 193 | * This function does the following: |
| 194 | * - Free any pgts which were not freed yet |
| 195 | * - Free the mutex |
| 196 | * - Free DRAM default page mapping hops |
| 197 | */ |
| 198 | static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) |
| 199 | { |
| 200 | struct hl_device *hdev = ctx->hdev; |
| 201 | struct pgt_info *pgt_info; |
| 202 | struct hlist_node *tmp; |
| 203 | int i; |
| 204 | |
| 205 | dram_default_mapping_fini(ctx); |
| 206 | |
| 207 | if (!hash_empty(ctx->mmu_shadow_hash)) |
| 208 | dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n" , |
| 209 | ctx->asid); |
| 210 | |
| 211 | hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) { |
| 212 | dev_err_ratelimited(hdev->dev, |
| 213 | "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n" , |
| 214 | pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); |
| 215 | hl_mmu_dr_free_pgt_node(ctx, pgt_info); |
| 216 | } |
| 217 | } |
| 218 | |
| 219 | static int hl_mmu_v1_unmap(struct hl_ctx *ctx, |
| 220 | u64 virt_addr, bool is_dram_addr) |
| 221 | { |
| 222 | u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; |
| 223 | struct hl_device *hdev = ctx->hdev; |
| 224 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
| 225 | struct hl_mmu_properties *mmu_prop; |
| 226 | bool is_huge, clear_hop3 = true; |
| 227 | int hop_idx; |
| 228 | |
| 229 | /* shifts and masks are the same in PMMU and HPMMU, use one of them */ |
| 230 | mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; |
| 231 | |
| 232 | for (hop_idx = MMU_HOP0; hop_idx < MMU_HOP4; hop_idx++) { |
| 233 | if (hop_idx == MMU_HOP0) { |
| 234 | hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx); |
| 235 | } else { |
| 236 | hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); |
| 237 | if (hop_addr[hop_idx] == ULLONG_MAX) |
| 238 | goto not_mapped; |
| 239 | } |
| 240 | |
| 241 | hop_pte_addr[hop_idx] = |
| 242 | get_hop_pte_addr(ctx, mmu_prop, hop_addr_arr: hop_addr, virt_addr, hop_idx); |
| 243 | |
| 244 | curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; |
| 245 | } |
| 246 | |
| 247 | is_huge = curr_pte & mmu_prop->last_mask; |
| 248 | |
| 249 | if (is_dram_addr && !is_huge) { |
| 250 | dev_err(hdev->dev, "DRAM unmapping should use huge pages only\n" ); |
| 251 | return -EFAULT; |
| 252 | } |
| 253 | |
| 254 | if (!is_huge) { |
| 255 | hop_idx = MMU_HOP4; |
| 256 | hop_addr[hop_idx] = hl_mmu_get_next_hop_addr(ctx, curr_pte); |
| 257 | if (hop_addr[hop_idx] == ULLONG_MAX) |
| 258 | goto not_mapped; |
| 259 | |
| 260 | hop_pte_addr[hop_idx] = |
| 261 | get_hop_pte_addr(ctx, mmu_prop, hop_addr_arr: hop_addr, virt_addr, hop_idx); |
| 262 | curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; |
| 263 | clear_hop3 = false; |
| 264 | } |
| 265 | |
| 266 | if (hdev->dram_default_page_mapping && is_dram_addr) { |
| 267 | u64 default_pte = (prop->mmu_dram_default_page_addr & |
| 268 | HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | |
| 269 | PAGE_PRESENT_MASK; |
| 270 | if (curr_pte == default_pte) { |
| 271 | dev_err(hdev->dev, |
| 272 | "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n" , |
| 273 | virt_addr); |
| 274 | goto not_mapped; |
| 275 | } |
| 276 | |
| 277 | if (!(curr_pte & PAGE_PRESENT_MASK)) { |
| 278 | dev_err(hdev->dev, |
| 279 | "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n" , |
| 280 | virt_addr); |
| 281 | goto not_mapped; |
| 282 | } |
| 283 | |
| 284 | hop_idx = MMU_HOP3; |
| 285 | hl_mmu_dr_write_final_pte(ctx, shadow_pte_addr: hop_pte_addr[hop_idx], val: default_pte); |
| 286 | hl_mmu_dr_put_pte(ctx, hop_addr: hop_addr[hop_idx]); |
| 287 | } else { |
| 288 | if (!(curr_pte & PAGE_PRESENT_MASK)) |
| 289 | goto not_mapped; |
| 290 | |
| 291 | if (hop_addr[MMU_HOP4]) |
| 292 | hl_mmu_dr_clear_pte(ctx, pte_addr: hop_pte_addr[MMU_HOP4]); |
| 293 | else |
| 294 | hl_mmu_dr_clear_pte(ctx, pte_addr: hop_pte_addr[MMU_HOP3]); |
| 295 | |
| 296 | if (hop_addr[MMU_HOP4] && !hl_mmu_dr_put_pte(ctx, hop_addr: hop_addr[MMU_HOP4])) |
| 297 | clear_hop3 = true; |
| 298 | |
| 299 | if (!clear_hop3) |
| 300 | goto mapped; |
| 301 | |
| 302 | for (hop_idx = MMU_HOP3; hop_idx >= 0; hop_idx--) { |
| 303 | hl_mmu_dr_clear_pte(ctx, pte_addr: hop_pte_addr[hop_idx]); |
| 304 | |
| 305 | if (hop_idx == MMU_HOP0) |
| 306 | break; |
| 307 | |
| 308 | if (hl_mmu_dr_put_pte(ctx, hop_addr: hop_addr[hop_idx])) |
| 309 | goto mapped; |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | mapped: |
| 314 | return 0; |
| 315 | |
| 316 | not_mapped: |
| 317 | dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n" , |
| 318 | virt_addr); |
| 319 | |
| 320 | return -EINVAL; |
| 321 | } |
| 322 | |
| 323 | static int hl_mmu_v1_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, |
| 324 | u32 page_size, bool is_dram_addr) |
| 325 | { |
| 326 | u64 hop_addr[MMU_V1_MAX_HOPS] = {0}, hop_pte_addr[MMU_V1_MAX_HOPS] = {0}, curr_pte = 0; |
| 327 | struct hl_device *hdev = ctx->hdev; |
| 328 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
| 329 | struct hl_mmu_properties *mmu_prop; |
| 330 | bool is_huge, hop_new[MMU_V1_MAX_HOPS] = {false}; |
| 331 | int num_hops, hop_idx, prev_hop, rc = -ENOMEM; |
| 332 | |
| 333 | /* |
| 334 | * This mapping function can map a page or a huge page. For huge page |
| 335 | * there are only 3 hops rather than 4. Currently the DRAM allocation |
| 336 | * uses huge pages only but user memory could have been allocated with |
| 337 | * one of the two page sizes. Since this is a common code for all the |
| 338 | * three cases, we need this hugs page check. |
| 339 | */ |
| 340 | if (is_dram_addr) { |
| 341 | mmu_prop = &prop->dmmu; |
| 342 | is_huge = true; |
| 343 | } else if (page_size == prop->pmmu_huge.page_size) { |
| 344 | mmu_prop = &prop->pmmu_huge; |
| 345 | is_huge = true; |
| 346 | } else { |
| 347 | mmu_prop = &prop->pmmu; |
| 348 | is_huge = false; |
| 349 | } |
| 350 | |
| 351 | num_hops = is_huge ? (MMU_V1_MAX_HOPS - 1) : MMU_V1_MAX_HOPS; |
| 352 | |
| 353 | for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) { |
| 354 | if (hop_idx == MMU_HOP0) { |
| 355 | hop_addr[hop_idx] = hl_mmu_dr_get_hop0_addr(ctx); |
| 356 | } else { |
| 357 | hop_addr[hop_idx] = |
| 358 | hl_mmu_dr_get_alloc_next_hop_addr(ctx, curr_pte, is_new_hop: &hop_new[hop_idx]); |
| 359 | if (hop_addr[hop_idx] == ULLONG_MAX) |
| 360 | goto err; |
| 361 | } |
| 362 | |
| 363 | hop_pte_addr[hop_idx] = |
| 364 | get_hop_pte_addr(ctx, mmu_prop, hop_addr_arr: hop_addr, virt_addr, hop_idx); |
| 365 | curr_pte = *(u64 *) (uintptr_t) hop_pte_addr[hop_idx]; |
| 366 | } |
| 367 | |
| 368 | if (hdev->dram_default_page_mapping && is_dram_addr) { |
| 369 | u64 default_pte = (prop->mmu_dram_default_page_addr & |
| 370 | HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask | |
| 371 | PAGE_PRESENT_MASK; |
| 372 | |
| 373 | if (curr_pte != default_pte) { |
| 374 | dev_err(hdev->dev, |
| 375 | "DRAM: mapping already exists for virt_addr 0x%llx\n" , |
| 376 | virt_addr); |
| 377 | rc = -EINVAL; |
| 378 | goto err; |
| 379 | } |
| 380 | |
| 381 | for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { |
| 382 | if (hop_new[hop_idx]) { |
| 383 | dev_err(hdev->dev, "DRAM mapping should not allocate more hops\n" ); |
| 384 | rc = -EFAULT; |
| 385 | goto err; |
| 386 | } |
| 387 | } |
| 388 | } else if (curr_pte & PAGE_PRESENT_MASK) { |
| 389 | dev_err(hdev->dev, |
| 390 | "mapping already exists for virt_addr 0x%llx\n" , |
| 391 | virt_addr); |
| 392 | |
| 393 | for (hop_idx = MMU_HOP0; hop_idx < num_hops; hop_idx++) |
| 394 | dev_dbg(hdev->dev, "hop%d pte: 0x%llx (0x%llx)\n" , hop_idx, |
| 395 | *(u64 *) (uintptr_t) hop_pte_addr[hop_idx], |
| 396 | hop_pte_addr[hop_idx]); |
| 397 | |
| 398 | rc = -EINVAL; |
| 399 | goto err; |
| 400 | } |
| 401 | |
| 402 | curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | mmu_prop->last_mask |
| 403 | | PAGE_PRESENT_MASK; |
| 404 | |
| 405 | hl_mmu_dr_write_final_pte(ctx, shadow_pte_addr: hop_pte_addr[num_hops - 1], val: curr_pte); |
| 406 | |
| 407 | for (hop_idx = MMU_HOP1; hop_idx < num_hops; hop_idx++) { |
| 408 | prev_hop = hop_idx - 1; |
| 409 | |
| 410 | if (hop_new[hop_idx]) { |
| 411 | curr_pte = (hop_addr[hop_idx] & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK; |
| 412 | hl_mmu_dr_write_pte(ctx, shadow_pte_addr: hop_pte_addr[prev_hop], val: curr_pte); |
| 413 | if (hop_idx != MMU_HOP1) |
| 414 | hl_mmu_dr_get_pte(ctx, hop_addr: hop_addr[prev_hop]); |
| 415 | } |
| 416 | } |
| 417 | |
| 418 | hl_mmu_dr_get_pte(ctx, hop_addr: hop_addr[num_hops - 1]); |
| 419 | |
| 420 | return 0; |
| 421 | |
| 422 | err: |
| 423 | for (hop_idx = num_hops; hop_idx > MMU_HOP0; hop_idx--) { |
| 424 | if (hop_new[hop_idx]) |
| 425 | hl_mmu_dr_free_hop(ctx, hop_addr: hop_addr[hop_idx]); |
| 426 | } |
| 427 | |
| 428 | return rc; |
| 429 | } |
| 430 | |
| 431 | /* |
| 432 | * hl_mmu_v1_swap_out - marks all mapping of the given ctx as swapped out |
| 433 | * |
| 434 | * @ctx: pointer to the context structure |
| 435 | * |
| 436 | */ |
| 437 | static void hl_mmu_v1_swap_out(struct hl_ctx *ctx) |
| 438 | { |
| 439 | |
| 440 | } |
| 441 | |
| 442 | /* |
| 443 | * hl_mmu_v1_swap_in - marks all mapping of the given ctx as swapped in |
| 444 | * |
| 445 | * @ctx: pointer to the context structure |
| 446 | * |
| 447 | */ |
| 448 | static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) |
| 449 | { |
| 450 | |
| 451 | } |
| 452 | |
| 453 | static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, |
| 454 | struct hl_mmu_hop_info *hops) |
| 455 | { |
| 456 | struct hl_device *hdev = ctx->hdev; |
| 457 | struct asic_fixed_properties *prop = &hdev->asic_prop; |
| 458 | struct hl_mmu_properties *mmu_prop; |
| 459 | bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge; |
| 460 | int i, used_hops; |
| 461 | |
| 462 | is_dram_addr = hl_mem_area_inside_range(address: virt_addr, size: prop->dmmu.page_size, |
| 463 | range_start_address: prop->dmmu.start_addr, |
| 464 | range_end_address: prop->dmmu.end_addr); |
| 465 | is_pmmu_addr = hl_mem_area_inside_range(address: virt_addr, size: prop->pmmu.page_size, |
| 466 | range_start_address: prop->pmmu.start_addr, |
| 467 | range_end_address: prop->pmmu.end_addr); |
| 468 | is_pmmu_h_addr = hl_mem_area_inside_range(address: virt_addr, |
| 469 | size: prop->pmmu_huge.page_size, |
| 470 | range_start_address: prop->pmmu_huge.start_addr, |
| 471 | range_end_address: prop->pmmu_huge.end_addr); |
| 472 | if (is_dram_addr) { |
| 473 | mmu_prop = &prop->dmmu; |
| 474 | is_huge = true; |
| 475 | } else if (is_pmmu_addr) { |
| 476 | mmu_prop = &prop->pmmu; |
| 477 | is_huge = false; |
| 478 | } else if (is_pmmu_h_addr) { |
| 479 | mmu_prop = &prop->pmmu_huge; |
| 480 | is_huge = true; |
| 481 | } else { |
| 482 | return -EINVAL; |
| 483 | } |
| 484 | |
| 485 | used_hops = mmu_prop->num_hops; |
| 486 | |
| 487 | /* huge pages use lesser hops */ |
| 488 | if (is_huge) |
| 489 | used_hops--; |
| 490 | |
| 491 | hops->hop_info[0].hop_addr = hl_mmu_dr_get_phys_hop0_addr(ctx); |
| 492 | hops->hop_info[0].hop_pte_addr = |
| 493 | hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, hop_idx: 0, |
| 494 | hop_addr: hops->hop_info[0].hop_addr, virt_addr); |
| 495 | hops->hop_info[0].hop_pte_val = |
| 496 | hdev->asic_funcs->read_pte(hdev, |
| 497 | hops->hop_info[0].hop_pte_addr); |
| 498 | |
| 499 | for (i = 1 ; i < used_hops ; i++) { |
| 500 | hops->hop_info[i].hop_addr = |
| 501 | hl_mmu_get_next_hop_addr(ctx, |
| 502 | curr_pte: hops->hop_info[i - 1].hop_pte_val); |
| 503 | if (hops->hop_info[i].hop_addr == ULLONG_MAX) |
| 504 | return -EFAULT; |
| 505 | |
| 506 | hops->hop_info[i].hop_pte_addr = |
| 507 | hl_mmu_get_hop_pte_phys_addr(ctx, mmu_prop, hop_idx: i, |
| 508 | hop_addr: hops->hop_info[i].hop_addr, |
| 509 | virt_addr); |
| 510 | hops->hop_info[i].hop_pte_val = |
| 511 | hdev->asic_funcs->read_pte(hdev, |
| 512 | hops->hop_info[i].hop_pte_addr); |
| 513 | |
| 514 | if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) |
| 515 | return -EFAULT; |
| 516 | |
| 517 | if (hops->hop_info[i].hop_pte_val & mmu_prop->last_mask) |
| 518 | break; |
| 519 | } |
| 520 | |
| 521 | /* if passed over all hops then no last hop was found */ |
| 522 | if (i == mmu_prop->num_hops) |
| 523 | return -EFAULT; |
| 524 | |
| 525 | if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) |
| 526 | return -EFAULT; |
| 527 | |
| 528 | hops->used_hops = i + 1; |
| 529 | |
| 530 | return 0; |
| 531 | } |
| 532 | |
| 533 | /* |
| 534 | * hl_mmu_v1_prepare - prepare mmu for working with mmu v1 |
| 535 | * |
| 536 | * @hdev: pointer to the device structure |
| 537 | */ |
| 538 | void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) |
| 539 | { |
| 540 | mmu->init = hl_mmu_dr_init; |
| 541 | mmu->fini = hl_mmu_dr_fini; |
| 542 | mmu->ctx_init = hl_mmu_v1_ctx_init; |
| 543 | mmu->ctx_fini = hl_mmu_v1_ctx_fini; |
| 544 | mmu->map = hl_mmu_v1_map; |
| 545 | mmu->unmap = hl_mmu_v1_unmap; |
| 546 | mmu->flush = hl_mmu_dr_flush; |
| 547 | mmu->swap_out = hl_mmu_v1_swap_out; |
| 548 | mmu->swap_in = hl_mmu_v1_swap_in; |
| 549 | mmu->get_tlb_info = hl_mmu_v1_get_tlb_info; |
| 550 | } |
| 551 | |