1 | /* |
2 | * Copyright 2018 Red Hat Inc. |
3 | * |
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
5 | * copy of this software and associated documentation files (the "Software"), |
6 | * to deal in the Software without restriction, including without limitation |
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
8 | * and/or sell copies of the Software, and to permit persons to whom the |
9 | * Software is furnished to do so, subject to the following conditions: |
10 | * |
11 | * The above copyright notice and this permission notice shall be included in |
12 | * all copies or substantial portions of the Software. |
13 | * |
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
17 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
20 | * OTHER DEALINGS IN THE SOFTWARE. |
21 | */ |
22 | #include "nouveau_svm.h" |
23 | #include "nouveau_drv.h" |
24 | #include "nouveau_chan.h" |
25 | #include "nouveau_dmem.h" |
26 | |
27 | #include <nvif/event.h> |
28 | #include <nvif/object.h> |
29 | #include <nvif/vmm.h> |
30 | |
31 | #include <nvif/class.h> |
32 | #include <nvif/clb069.h> |
33 | #include <nvif/ifc00d.h> |
34 | |
35 | #include <linux/sched/mm.h> |
36 | #include <linux/sort.h> |
37 | #include <linux/hmm.h> |
38 | #include <linux/memremap.h> |
39 | #include <linux/rmap.h> |
40 | |
41 | struct nouveau_svm { |
42 | struct nouveau_drm *drm; |
43 | struct mutex mutex; |
44 | struct list_head inst; |
45 | |
46 | struct nouveau_svm_fault_buffer { |
47 | int id; |
48 | struct nvif_object object; |
49 | u32 entries; |
50 | u32 getaddr; |
51 | u32 putaddr; |
52 | u32 get; |
53 | u32 put; |
54 | struct nvif_event notify; |
55 | struct work_struct work; |
56 | |
57 | struct nouveau_svm_fault { |
58 | u64 inst; |
59 | u64 addr; |
60 | u64 time; |
61 | u32 engine; |
62 | u8 gpc; |
63 | u8 hub; |
64 | u8 access; |
65 | u8 client; |
66 | u8 fault; |
67 | struct nouveau_svmm *svmm; |
68 | } **fault; |
69 | int fault_nr; |
70 | } buffer[]; |
71 | }; |
72 | |
73 | #define FAULT_ACCESS_READ 0 |
74 | #define FAULT_ACCESS_WRITE 1 |
75 | #define FAULT_ACCESS_ATOMIC 2 |
76 | #define FAULT_ACCESS_PREFETCH 3 |
77 | |
78 | #define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a) |
79 | #define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a) |
80 | |
81 | struct nouveau_pfnmap_args { |
82 | struct nvif_ioctl_v0 i; |
83 | struct nvif_ioctl_mthd_v0 m; |
84 | struct nvif_vmm_pfnmap_v0 p; |
85 | }; |
86 | |
87 | struct nouveau_ivmm { |
88 | struct nouveau_svmm *svmm; |
89 | u64 inst; |
90 | struct list_head head; |
91 | }; |
92 | |
93 | static struct nouveau_ivmm * |
94 | nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst) |
95 | { |
96 | struct nouveau_ivmm *ivmm; |
97 | list_for_each_entry(ivmm, &svm->inst, head) { |
98 | if (ivmm->inst == inst) |
99 | return ivmm; |
100 | } |
101 | return NULL; |
102 | } |
103 | |
104 | #define SVMM_DBG(s,f,a...) \ |
105 | NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a) |
106 | #define SVMM_ERR(s,f,a...) \ |
107 | NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a) |
108 | |
109 | int |
110 | nouveau_svmm_bind(struct drm_device *dev, void *data, |
111 | struct drm_file *file_priv) |
112 | { |
113 | struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv); |
114 | struct drm_nouveau_svm_bind *args = data; |
115 | unsigned target, cmd, priority; |
116 | unsigned long addr, end; |
117 | struct mm_struct *mm; |
118 | |
119 | args->va_start &= PAGE_MASK; |
120 | args->va_end = ALIGN(args->va_end, PAGE_SIZE); |
121 | |
122 | /* Sanity check arguments */ |
123 | if (args->reserved0 || args->reserved1) |
124 | return -EINVAL; |
125 | if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK)) |
126 | return -EINVAL; |
127 | if (args->va_start >= args->va_end) |
128 | return -EINVAL; |
129 | |
130 | cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT; |
131 | cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK; |
132 | switch (cmd) { |
133 | case NOUVEAU_SVM_BIND_COMMAND__MIGRATE: |
134 | break; |
135 | default: |
136 | return -EINVAL; |
137 | } |
138 | |
139 | priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT; |
140 | priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK; |
141 | |
142 | /* FIXME support CPU target ie all target value < GPU_VRAM */ |
143 | target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT; |
144 | target &= NOUVEAU_SVM_BIND_TARGET_MASK; |
145 | switch (target) { |
146 | case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM: |
147 | break; |
148 | default: |
149 | return -EINVAL; |
150 | } |
151 | |
152 | /* |
153 | * FIXME: For now refuse non 0 stride, we need to change the migrate |
154 | * kernel function to handle stride to avoid to create a mess within |
155 | * each device driver. |
156 | */ |
157 | if (args->stride) |
158 | return -EINVAL; |
159 | |
160 | /* |
161 | * Ok we are ask to do something sane, for now we only support migrate |
162 | * commands but we will add things like memory policy (what to do on |
163 | * page fault) and maybe some other commands. |
164 | */ |
165 | |
166 | mm = get_task_mm(current); |
167 | if (!mm) { |
168 | return -EINVAL; |
169 | } |
170 | mmap_read_lock(mm); |
171 | |
172 | if (!cli->svm.svmm) { |
173 | mmap_read_unlock(mm); |
174 | mmput(mm); |
175 | return -EINVAL; |
176 | } |
177 | |
178 | for (addr = args->va_start, end = args->va_end; addr < end;) { |
179 | struct vm_area_struct *vma; |
180 | unsigned long next; |
181 | |
182 | vma = find_vma_intersection(mm, start_addr: addr, end_addr: end); |
183 | if (!vma) |
184 | break; |
185 | |
186 | addr = max(addr, vma->vm_start); |
187 | next = min(vma->vm_end, end); |
188 | /* This is a best effort so we ignore errors */ |
189 | nouveau_dmem_migrate_vma(drm: cli->drm, svmm: cli->svm.svmm, vma, start: addr, |
190 | end: next); |
191 | addr = next; |
192 | } |
193 | |
194 | /* |
195 | * FIXME Return the number of page we have migrated, again we need to |
196 | * update the migrate API to return that information so that we can |
197 | * report it to user space. |
198 | */ |
199 | args->result = 0; |
200 | |
201 | mmap_read_unlock(mm); |
202 | mmput(mm); |
203 | |
204 | return 0; |
205 | } |
206 | |
207 | /* Unlink channel instance from SVMM. */ |
208 | void |
209 | nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) |
210 | { |
211 | struct nouveau_ivmm *ivmm; |
212 | if (svmm) { |
213 | mutex_lock(&svmm->vmm->cli->drm->svm->mutex); |
214 | ivmm = nouveau_ivmm_find(svm: svmm->vmm->cli->drm->svm, inst); |
215 | if (ivmm) { |
216 | list_del(entry: &ivmm->head); |
217 | kfree(objp: ivmm); |
218 | } |
219 | mutex_unlock(lock: &svmm->vmm->cli->drm->svm->mutex); |
220 | } |
221 | } |
222 | |
223 | /* Link channel instance to SVMM. */ |
224 | int |
225 | nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst) |
226 | { |
227 | struct nouveau_ivmm *ivmm; |
228 | if (svmm) { |
229 | if (!(ivmm = kmalloc(size: sizeof(*ivmm), GFP_KERNEL))) |
230 | return -ENOMEM; |
231 | ivmm->svmm = svmm; |
232 | ivmm->inst = inst; |
233 | |
234 | mutex_lock(&svmm->vmm->cli->drm->svm->mutex); |
235 | list_add(new: &ivmm->head, head: &svmm->vmm->cli->drm->svm->inst); |
236 | mutex_unlock(lock: &svmm->vmm->cli->drm->svm->mutex); |
237 | } |
238 | return 0; |
239 | } |
240 | |
241 | /* Invalidate SVMM address-range on GPU. */ |
242 | void |
243 | nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit) |
244 | { |
245 | if (limit > start) { |
246 | nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR, |
247 | &(struct nvif_vmm_pfnclr_v0) { |
248 | .addr = start, |
249 | .size = limit - start, |
250 | }, sizeof(struct nvif_vmm_pfnclr_v0)); |
251 | } |
252 | } |
253 | |
254 | static int |
255 | nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn, |
256 | const struct mmu_notifier_range *update) |
257 | { |
258 | struct nouveau_svmm *svmm = |
259 | container_of(mn, struct nouveau_svmm, notifier); |
260 | unsigned long start = update->start; |
261 | unsigned long limit = update->end; |
262 | |
263 | if (!mmu_notifier_range_blockable(range: update)) |
264 | return -EAGAIN; |
265 | |
266 | SVMM_DBG(svmm, "invalidate %016lx-%016lx" , start, limit); |
267 | |
268 | mutex_lock(&svmm->mutex); |
269 | if (unlikely(!svmm->vmm)) |
270 | goto out; |
271 | |
272 | /* |
273 | * Ignore invalidation callbacks for device private pages since |
274 | * the invalidation is handled as part of the migration process. |
275 | */ |
276 | if (update->event == MMU_NOTIFY_MIGRATE && |
277 | update->owner == svmm->vmm->cli->drm->dev) |
278 | goto out; |
279 | |
280 | if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) { |
281 | if (start < svmm->unmanaged.start) { |
282 | nouveau_svmm_invalidate(svmm, start, |
283 | limit: svmm->unmanaged.limit); |
284 | } |
285 | start = svmm->unmanaged.limit; |
286 | } |
287 | |
288 | nouveau_svmm_invalidate(svmm, start, limit); |
289 | |
290 | out: |
291 | mutex_unlock(lock: &svmm->mutex); |
292 | return 0; |
293 | } |
294 | |
295 | static void nouveau_svmm_free_notifier(struct mmu_notifier *mn) |
296 | { |
297 | kfree(container_of(mn, struct nouveau_svmm, notifier)); |
298 | } |
299 | |
300 | static const struct mmu_notifier_ops nouveau_mn_ops = { |
301 | .invalidate_range_start = nouveau_svmm_invalidate_range_start, |
302 | .free_notifier = nouveau_svmm_free_notifier, |
303 | }; |
304 | |
305 | void |
306 | nouveau_svmm_fini(struct nouveau_svmm **psvmm) |
307 | { |
308 | struct nouveau_svmm *svmm = *psvmm; |
309 | if (svmm) { |
310 | mutex_lock(&svmm->mutex); |
311 | svmm->vmm = NULL; |
312 | mutex_unlock(lock: &svmm->mutex); |
313 | mmu_notifier_put(subscription: &svmm->notifier); |
314 | *psvmm = NULL; |
315 | } |
316 | } |
317 | |
318 | int |
319 | nouveau_svmm_init(struct drm_device *dev, void *data, |
320 | struct drm_file *file_priv) |
321 | { |
322 | struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv); |
323 | struct nouveau_svmm *svmm; |
324 | struct drm_nouveau_svm_init *args = data; |
325 | int ret; |
326 | |
327 | /* We need to fail if svm is disabled */ |
328 | if (!cli->drm->svm) |
329 | return -ENOSYS; |
330 | |
331 | /* Allocate tracking for SVM-enabled VMM. */ |
332 | if (!(svmm = kzalloc(size: sizeof(*svmm), GFP_KERNEL))) |
333 | return -ENOMEM; |
334 | svmm->vmm = &cli->svm; |
335 | svmm->unmanaged.start = args->unmanaged_addr; |
336 | svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size; |
337 | mutex_init(&svmm->mutex); |
338 | |
339 | /* Check that SVM isn't already enabled for the client. */ |
340 | mutex_lock(&cli->mutex); |
341 | if (cli->svm.cli) { |
342 | ret = -EBUSY; |
343 | goto out_free; |
344 | } |
345 | |
346 | /* Allocate a new GPU VMM that can support SVM (managed by the |
347 | * client, with replayable faults enabled). |
348 | * |
349 | * All future channel/memory allocations will make use of this |
350 | * VMM instead of the standard one. |
351 | */ |
352 | ret = nvif_vmm_ctor(&cli->mmu, "svmVmm" , |
353 | cli->vmm.vmm.object.oclass, MANAGED, |
354 | args->unmanaged_addr, args->unmanaged_size, |
355 | &(struct gp100_vmm_v0) { |
356 | .fault_replay = true, |
357 | }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm); |
358 | if (ret) |
359 | goto out_free; |
360 | |
361 | mmap_write_lock(current->mm); |
362 | svmm->notifier.ops = &nouveau_mn_ops; |
363 | ret = __mmu_notifier_register(subscription: &svmm->notifier, current->mm); |
364 | if (ret) |
365 | goto out_mm_unlock; |
366 | /* Note, ownership of svmm transfers to mmu_notifier */ |
367 | |
368 | cli->svm.svmm = svmm; |
369 | cli->svm.cli = cli; |
370 | mmap_write_unlock(current->mm); |
371 | mutex_unlock(lock: &cli->mutex); |
372 | return 0; |
373 | |
374 | out_mm_unlock: |
375 | mmap_write_unlock(current->mm); |
376 | out_free: |
377 | mutex_unlock(lock: &cli->mutex); |
378 | kfree(objp: svmm); |
379 | return ret; |
380 | } |
381 | |
382 | /* Issue fault replay for GPU to retry accesses that faulted previously. */ |
383 | static void |
384 | nouveau_svm_fault_replay(struct nouveau_svm *svm) |
385 | { |
386 | SVM_DBG(svm, "replay" ); |
387 | WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object, |
388 | GP100_VMM_VN_FAULT_REPLAY, |
389 | &(struct gp100_vmm_fault_replay_vn) {}, |
390 | sizeof(struct gp100_vmm_fault_replay_vn))); |
391 | } |
392 | |
393 | /* Cancel a replayable fault that could not be handled. |
394 | * |
395 | * Cancelling the fault will trigger recovery to reset the engine |
396 | * and kill the offending channel (ie. GPU SIGSEGV). |
397 | */ |
398 | static void |
399 | nouveau_svm_fault_cancel(struct nouveau_svm *svm, |
400 | u64 inst, u8 hub, u8 gpc, u8 client) |
401 | { |
402 | SVM_DBG(svm, "cancel %016llx %d %02x %02x" , inst, hub, gpc, client); |
403 | WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object, |
404 | GP100_VMM_VN_FAULT_CANCEL, |
405 | &(struct gp100_vmm_fault_cancel_v0) { |
406 | .hub = hub, |
407 | .gpc = gpc, |
408 | .client = client, |
409 | .inst = inst, |
410 | }, sizeof(struct gp100_vmm_fault_cancel_v0))); |
411 | } |
412 | |
413 | static void |
414 | nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm, |
415 | struct nouveau_svm_fault *fault) |
416 | { |
417 | nouveau_svm_fault_cancel(svm, inst: fault->inst, |
418 | hub: fault->hub, |
419 | gpc: fault->gpc, |
420 | client: fault->client); |
421 | } |
422 | |
423 | static int |
424 | nouveau_svm_fault_priority(u8 fault) |
425 | { |
426 | switch (fault) { |
427 | case FAULT_ACCESS_PREFETCH: |
428 | return 0; |
429 | case FAULT_ACCESS_READ: |
430 | return 1; |
431 | case FAULT_ACCESS_WRITE: |
432 | return 2; |
433 | case FAULT_ACCESS_ATOMIC: |
434 | return 3; |
435 | default: |
436 | WARN_ON_ONCE(1); |
437 | return -1; |
438 | } |
439 | } |
440 | |
441 | static int |
442 | nouveau_svm_fault_cmp(const void *a, const void *b) |
443 | { |
444 | const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a; |
445 | const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b; |
446 | int ret; |
447 | if ((ret = (s64)fa->inst - fb->inst)) |
448 | return ret; |
449 | if ((ret = (s64)fa->addr - fb->addr)) |
450 | return ret; |
451 | return nouveau_svm_fault_priority(fault: fa->access) - |
452 | nouveau_svm_fault_priority(fault: fb->access); |
453 | } |
454 | |
455 | static void |
456 | nouveau_svm_fault_cache(struct nouveau_svm *svm, |
457 | struct nouveau_svm_fault_buffer *buffer, u32 offset) |
458 | { |
459 | struct nvif_object *memory = &buffer->object; |
460 | const u32 instlo = nvif_rd32(memory, offset + 0x00); |
461 | const u32 insthi = nvif_rd32(memory, offset + 0x04); |
462 | const u32 addrlo = nvif_rd32(memory, offset + 0x08); |
463 | const u32 addrhi = nvif_rd32(memory, offset + 0x0c); |
464 | const u32 timelo = nvif_rd32(memory, offset + 0x10); |
465 | const u32 timehi = nvif_rd32(memory, offset + 0x14); |
466 | const u32 engine = nvif_rd32(memory, offset + 0x18); |
467 | const u32 info = nvif_rd32(memory, offset + 0x1c); |
468 | const u64 inst = (u64)insthi << 32 | instlo; |
469 | const u8 gpc = (info & 0x1f000000) >> 24; |
470 | const u8 hub = (info & 0x00100000) >> 20; |
471 | const u8 client = (info & 0x00007f00) >> 8; |
472 | struct nouveau_svm_fault *fault; |
473 | |
474 | //XXX: i think we're supposed to spin waiting */ |
475 | if (WARN_ON(!(info & 0x80000000))) |
476 | return; |
477 | |
478 | nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000); |
479 | |
480 | if (!buffer->fault[buffer->fault_nr]) { |
481 | fault = kmalloc(size: sizeof(*fault), GFP_KERNEL); |
482 | if (WARN_ON(!fault)) { |
483 | nouveau_svm_fault_cancel(svm, inst, hub, gpc, client); |
484 | return; |
485 | } |
486 | buffer->fault[buffer->fault_nr] = fault; |
487 | } |
488 | |
489 | fault = buffer->fault[buffer->fault_nr++]; |
490 | fault->inst = inst; |
491 | fault->addr = (u64)addrhi << 32 | addrlo; |
492 | fault->time = (u64)timehi << 32 | timelo; |
493 | fault->engine = engine; |
494 | fault->gpc = gpc; |
495 | fault->hub = hub; |
496 | fault->access = (info & 0x000f0000) >> 16; |
497 | fault->client = client; |
498 | fault->fault = (info & 0x0000001f); |
499 | |
500 | SVM_DBG(svm, "fault %016llx %016llx %02x" , |
501 | fault->inst, fault->addr, fault->access); |
502 | } |
503 | |
504 | struct svm_notifier { |
505 | struct mmu_interval_notifier notifier; |
506 | struct nouveau_svmm *svmm; |
507 | }; |
508 | |
509 | static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni, |
510 | const struct mmu_notifier_range *range, |
511 | unsigned long cur_seq) |
512 | { |
513 | struct svm_notifier *sn = |
514 | container_of(mni, struct svm_notifier, notifier); |
515 | |
516 | if (range->event == MMU_NOTIFY_EXCLUSIVE && |
517 | range->owner == sn->svmm->vmm->cli->drm->dev) |
518 | return true; |
519 | |
520 | /* |
521 | * serializes the update to mni->invalidate_seq done by caller and |
522 | * prevents invalidation of the PTE from progressing while HW is being |
523 | * programmed. This is very hacky and only works because the normal |
524 | * notifier that does invalidation is always called after the range |
525 | * notifier. |
526 | */ |
527 | if (mmu_notifier_range_blockable(range)) |
528 | mutex_lock(&sn->svmm->mutex); |
529 | else if (!mutex_trylock(lock: &sn->svmm->mutex)) |
530 | return false; |
531 | mmu_interval_set_seq(interval_sub: mni, cur_seq); |
532 | mutex_unlock(lock: &sn->svmm->mutex); |
533 | return true; |
534 | } |
535 | |
536 | static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = { |
537 | .invalidate = nouveau_svm_range_invalidate, |
538 | }; |
539 | |
540 | static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, |
541 | struct hmm_range *range, |
542 | struct nouveau_pfnmap_args *args) |
543 | { |
544 | struct page *page; |
545 | |
546 | /* |
547 | * The address prepared here is passed through nvif_object_ioctl() |
548 | * to an eventual DMA map in something like gp100_vmm_pgt_pfn() |
549 | * |
550 | * This is all just encoding the internal hmm representation into a |
551 | * different nouveau internal representation. |
552 | */ |
553 | if (!(range->hmm_pfns[0] & HMM_PFN_VALID)) { |
554 | args->p.phys[0] = 0; |
555 | return; |
556 | } |
557 | |
558 | page = hmm_pfn_to_page(hmm_pfn: range->hmm_pfns[0]); |
559 | /* |
560 | * Only map compound pages to the GPU if the CPU is also mapping the |
561 | * page as a compound page. Otherwise, the PTE protections might not be |
562 | * consistent (e.g., CPU only maps part of a compound page). |
563 | * Note that the underlying page might still be larger than the |
564 | * CPU mapping (e.g., a PUD sized compound page partially mapped with |
565 | * a PMD sized page table entry). |
566 | */ |
567 | if (hmm_pfn_to_map_order(hmm_pfn: range->hmm_pfns[0])) { |
568 | unsigned long addr = args->p.addr; |
569 | |
570 | args->p.page = hmm_pfn_to_map_order(hmm_pfn: range->hmm_pfns[0]) + |
571 | PAGE_SHIFT; |
572 | args->p.size = 1UL << args->p.page; |
573 | args->p.addr &= ~(args->p.size - 1); |
574 | page -= (addr - args->p.addr) >> PAGE_SHIFT; |
575 | } |
576 | if (is_device_private_page(page)) |
577 | args->p.phys[0] = nouveau_dmem_page_addr(page) | |
578 | NVIF_VMM_PFNMAP_V0_V | |
579 | NVIF_VMM_PFNMAP_V0_VRAM; |
580 | else |
581 | args->p.phys[0] = page_to_phys(page) | |
582 | NVIF_VMM_PFNMAP_V0_V | |
583 | NVIF_VMM_PFNMAP_V0_HOST; |
584 | if (range->hmm_pfns[0] & HMM_PFN_WRITE) |
585 | args->p.phys[0] |= NVIF_VMM_PFNMAP_V0_W; |
586 | } |
587 | |
588 | static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, |
589 | struct nouveau_drm *drm, |
590 | struct nouveau_pfnmap_args *args, u32 size, |
591 | struct svm_notifier *notifier) |
592 | { |
593 | unsigned long timeout = |
594 | jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); |
595 | struct mm_struct *mm = svmm->notifier.mm; |
596 | struct page *page; |
597 | unsigned long start = args->p.addr; |
598 | unsigned long notifier_seq; |
599 | int ret = 0; |
600 | |
601 | ret = mmu_interval_notifier_insert(interval_sub: ¬ifier->notifier, mm, |
602 | start: args->p.addr, length: args->p.size, |
603 | ops: &nouveau_svm_mni_ops); |
604 | if (ret) |
605 | return ret; |
606 | |
607 | while (true) { |
608 | if (time_after(jiffies, timeout)) { |
609 | ret = -EBUSY; |
610 | goto out; |
611 | } |
612 | |
613 | notifier_seq = mmu_interval_read_begin(interval_sub: ¬ifier->notifier); |
614 | mmap_read_lock(mm); |
615 | ret = make_device_exclusive_range(mm, start, end: start + PAGE_SIZE, |
616 | pages: &page, arg: drm->dev); |
617 | mmap_read_unlock(mm); |
618 | if (ret <= 0 || !page) { |
619 | ret = -EINVAL; |
620 | goto out; |
621 | } |
622 | |
623 | mutex_lock(&svmm->mutex); |
624 | if (!mmu_interval_read_retry(interval_sub: ¬ifier->notifier, |
625 | seq: notifier_seq)) |
626 | break; |
627 | mutex_unlock(lock: &svmm->mutex); |
628 | } |
629 | |
630 | /* Map the page on the GPU. */ |
631 | args->p.page = 12; |
632 | args->p.size = PAGE_SIZE; |
633 | args->p.addr = start; |
634 | args->p.phys[0] = page_to_phys(page) | |
635 | NVIF_VMM_PFNMAP_V0_V | |
636 | NVIF_VMM_PFNMAP_V0_W | |
637 | NVIF_VMM_PFNMAP_V0_A | |
638 | NVIF_VMM_PFNMAP_V0_HOST; |
639 | |
640 | ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); |
641 | mutex_unlock(lock: &svmm->mutex); |
642 | |
643 | unlock_page(page); |
644 | put_page(page); |
645 | |
646 | out: |
647 | mmu_interval_notifier_remove(interval_sub: ¬ifier->notifier); |
648 | return ret; |
649 | } |
650 | |
651 | static int nouveau_range_fault(struct nouveau_svmm *svmm, |
652 | struct nouveau_drm *drm, |
653 | struct nouveau_pfnmap_args *args, u32 size, |
654 | unsigned long hmm_flags, |
655 | struct svm_notifier *notifier) |
656 | { |
657 | unsigned long timeout = |
658 | jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); |
659 | /* Have HMM fault pages within the fault window to the GPU. */ |
660 | unsigned long hmm_pfns[1]; |
661 | struct hmm_range range = { |
662 | .notifier = ¬ifier->notifier, |
663 | .default_flags = hmm_flags, |
664 | .hmm_pfns = hmm_pfns, |
665 | .dev_private_owner = drm->dev, |
666 | }; |
667 | struct mm_struct *mm = svmm->notifier.mm; |
668 | int ret; |
669 | |
670 | ret = mmu_interval_notifier_insert(interval_sub: ¬ifier->notifier, mm, |
671 | start: args->p.addr, length: args->p.size, |
672 | ops: &nouveau_svm_mni_ops); |
673 | if (ret) |
674 | return ret; |
675 | |
676 | range.start = notifier->notifier.interval_tree.start; |
677 | range.end = notifier->notifier.interval_tree.last + 1; |
678 | |
679 | while (true) { |
680 | if (time_after(jiffies, timeout)) { |
681 | ret = -EBUSY; |
682 | goto out; |
683 | } |
684 | |
685 | range.notifier_seq = mmu_interval_read_begin(interval_sub: range.notifier); |
686 | mmap_read_lock(mm); |
687 | ret = hmm_range_fault(range: &range); |
688 | mmap_read_unlock(mm); |
689 | if (ret) { |
690 | if (ret == -EBUSY) |
691 | continue; |
692 | goto out; |
693 | } |
694 | |
695 | mutex_lock(&svmm->mutex); |
696 | if (mmu_interval_read_retry(interval_sub: range.notifier, |
697 | seq: range.notifier_seq)) { |
698 | mutex_unlock(lock: &svmm->mutex); |
699 | continue; |
700 | } |
701 | break; |
702 | } |
703 | |
704 | nouveau_hmm_convert_pfn(drm, range: &range, args); |
705 | |
706 | ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); |
707 | mutex_unlock(lock: &svmm->mutex); |
708 | |
709 | out: |
710 | mmu_interval_notifier_remove(interval_sub: ¬ifier->notifier); |
711 | |
712 | return ret; |
713 | } |
714 | |
715 | static void |
716 | nouveau_svm_fault(struct work_struct *work) |
717 | { |
718 | struct nouveau_svm_fault_buffer *buffer = container_of(work, typeof(*buffer), work); |
719 | struct nouveau_svm *svm = container_of(buffer, typeof(*svm), buffer[buffer->id]); |
720 | struct nvif_object *device = &svm->drm->client.device.object; |
721 | struct nouveau_svmm *svmm; |
722 | struct { |
723 | struct nouveau_pfnmap_args i; |
724 | u64 phys[1]; |
725 | } args; |
726 | unsigned long hmm_flags; |
727 | u64 inst, start, limit; |
728 | int fi, fn; |
729 | int replay = 0, atomic = 0, ret; |
730 | |
731 | /* Parse available fault buffer entries into a cache, and update |
732 | * the GET pointer so HW can reuse the entries. |
733 | */ |
734 | SVM_DBG(svm, "fault handler" ); |
735 | if (buffer->get == buffer->put) { |
736 | buffer->put = nvif_rd32(device, buffer->putaddr); |
737 | buffer->get = nvif_rd32(device, buffer->getaddr); |
738 | if (buffer->get == buffer->put) |
739 | return; |
740 | } |
741 | buffer->fault_nr = 0; |
742 | |
743 | SVM_DBG(svm, "get %08x put %08x" , buffer->get, buffer->put); |
744 | while (buffer->get != buffer->put) { |
745 | nouveau_svm_fault_cache(svm, buffer, offset: buffer->get * 0x20); |
746 | if (++buffer->get == buffer->entries) |
747 | buffer->get = 0; |
748 | } |
749 | nvif_wr32(device, buffer->getaddr, buffer->get); |
750 | SVM_DBG(svm, "%d fault(s) pending" , buffer->fault_nr); |
751 | |
752 | /* Sort parsed faults by instance pointer to prevent unnecessary |
753 | * instance to SVMM translations, followed by address and access |
754 | * type to reduce the amount of work when handling the faults. |
755 | */ |
756 | sort(base: buffer->fault, num: buffer->fault_nr, size: sizeof(*buffer->fault), |
757 | cmp_func: nouveau_svm_fault_cmp, NULL); |
758 | |
759 | /* Lookup SVMM structure for each unique instance pointer. */ |
760 | mutex_lock(&svm->mutex); |
761 | for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) { |
762 | if (!svmm || buffer->fault[fi]->inst != inst) { |
763 | struct nouveau_ivmm *ivmm = |
764 | nouveau_ivmm_find(svm, inst: buffer->fault[fi]->inst); |
765 | svmm = ivmm ? ivmm->svmm : NULL; |
766 | inst = buffer->fault[fi]->inst; |
767 | SVM_DBG(svm, "inst %016llx -> svm-%p" , inst, svmm); |
768 | } |
769 | buffer->fault[fi]->svmm = svmm; |
770 | } |
771 | mutex_unlock(lock: &svm->mutex); |
772 | |
773 | /* Process list of faults. */ |
774 | args.i.i.version = 0; |
775 | args.i.i.type = NVIF_IOCTL_V0_MTHD; |
776 | args.i.m.version = 0; |
777 | args.i.m.method = NVIF_VMM_V0_PFNMAP; |
778 | args.i.p.version = 0; |
779 | |
780 | for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) { |
781 | struct svm_notifier notifier; |
782 | struct mm_struct *mm; |
783 | |
784 | /* Cancel any faults from non-SVM channels. */ |
785 | if (!(svmm = buffer->fault[fi]->svmm)) { |
786 | nouveau_svm_fault_cancel_fault(svm, fault: buffer->fault[fi]); |
787 | continue; |
788 | } |
789 | SVMM_DBG(svmm, "addr %016llx" , buffer->fault[fi]->addr); |
790 | |
791 | /* We try and group handling of faults within a small |
792 | * window into a single update. |
793 | */ |
794 | start = buffer->fault[fi]->addr; |
795 | limit = start + PAGE_SIZE; |
796 | if (start < svmm->unmanaged.limit) |
797 | limit = min_t(u64, limit, svmm->unmanaged.start); |
798 | |
799 | /* |
800 | * Prepare the GPU-side update of all pages within the |
801 | * fault window, determining required pages and access |
802 | * permissions based on pending faults. |
803 | */ |
804 | args.i.p.addr = start; |
805 | args.i.p.page = PAGE_SHIFT; |
806 | args.i.p.size = PAGE_SIZE; |
807 | /* |
808 | * Determine required permissions based on GPU fault |
809 | * access flags. |
810 | */ |
811 | switch (buffer->fault[fi]->access) { |
812 | case 0: /* READ. */ |
813 | hmm_flags = HMM_PFN_REQ_FAULT; |
814 | break; |
815 | case 2: /* ATOMIC. */ |
816 | atomic = true; |
817 | break; |
818 | case 3: /* PREFETCH. */ |
819 | hmm_flags = 0; |
820 | break; |
821 | default: |
822 | hmm_flags = HMM_PFN_REQ_FAULT | HMM_PFN_REQ_WRITE; |
823 | break; |
824 | } |
825 | |
826 | mm = svmm->notifier.mm; |
827 | if (!mmget_not_zero(mm)) { |
828 | nouveau_svm_fault_cancel_fault(svm, fault: buffer->fault[fi]); |
829 | continue; |
830 | } |
831 | |
832 | notifier.svmm = svmm; |
833 | if (atomic) |
834 | ret = nouveau_atomic_range_fault(svmm, drm: svm->drm, |
835 | args: &args.i, size: sizeof(args), |
836 | notifier: ¬ifier); |
837 | else |
838 | ret = nouveau_range_fault(svmm, drm: svm->drm, args: &args.i, |
839 | size: sizeof(args), hmm_flags, |
840 | notifier: ¬ifier); |
841 | mmput(mm); |
842 | |
843 | limit = args.i.p.addr + args.i.p.size; |
844 | for (fn = fi; ++fn < buffer->fault_nr; ) { |
845 | /* It's okay to skip over duplicate addresses from the |
846 | * same SVMM as faults are ordered by access type such |
847 | * that only the first one needs to be handled. |
848 | * |
849 | * ie. WRITE faults appear first, thus any handling of |
850 | * pending READ faults will already be satisfied. |
851 | * But if a large page is mapped, make sure subsequent |
852 | * fault addresses have sufficient access permission. |
853 | */ |
854 | if (buffer->fault[fn]->svmm != svmm || |
855 | buffer->fault[fn]->addr >= limit || |
856 | (buffer->fault[fi]->access == FAULT_ACCESS_READ && |
857 | !(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) || |
858 | (buffer->fault[fi]->access != FAULT_ACCESS_READ && |
859 | buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH && |
860 | !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)) || |
861 | (buffer->fault[fi]->access != FAULT_ACCESS_READ && |
862 | buffer->fault[fi]->access != FAULT_ACCESS_WRITE && |
863 | buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH && |
864 | !(args.phys[0] & NVIF_VMM_PFNMAP_V0_A))) |
865 | break; |
866 | } |
867 | |
868 | /* If handling failed completely, cancel all faults. */ |
869 | if (ret) { |
870 | while (fi < fn) { |
871 | struct nouveau_svm_fault *fault = |
872 | buffer->fault[fi++]; |
873 | |
874 | nouveau_svm_fault_cancel_fault(svm, fault); |
875 | } |
876 | } else |
877 | replay++; |
878 | } |
879 | |
880 | /* Issue fault replay to the GPU. */ |
881 | if (replay) |
882 | nouveau_svm_fault_replay(svm); |
883 | } |
884 | |
885 | static int |
886 | nouveau_svm_event(struct nvif_event *event, void *argv, u32 argc) |
887 | { |
888 | struct nouveau_svm_fault_buffer *buffer = container_of(event, typeof(*buffer), notify); |
889 | |
890 | schedule_work(work: &buffer->work); |
891 | return NVIF_EVENT_KEEP; |
892 | } |
893 | |
894 | static struct nouveau_pfnmap_args * |
895 | nouveau_pfns_to_args(void *pfns) |
896 | { |
897 | return container_of(pfns, struct nouveau_pfnmap_args, p.phys); |
898 | } |
899 | |
900 | u64 * |
901 | nouveau_pfns_alloc(unsigned long npages) |
902 | { |
903 | struct nouveau_pfnmap_args *args; |
904 | |
905 | args = kzalloc(struct_size(args, p.phys, npages), GFP_KERNEL); |
906 | if (!args) |
907 | return NULL; |
908 | |
909 | args->i.type = NVIF_IOCTL_V0_MTHD; |
910 | args->m.method = NVIF_VMM_V0_PFNMAP; |
911 | args->p.page = PAGE_SHIFT; |
912 | |
913 | return args->p.phys; |
914 | } |
915 | |
916 | void |
917 | nouveau_pfns_free(u64 *pfns) |
918 | { |
919 | struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns); |
920 | |
921 | kfree(objp: args); |
922 | } |
923 | |
924 | void |
925 | nouveau_pfns_map(struct nouveau_svmm *svmm, struct mm_struct *mm, |
926 | unsigned long addr, u64 *pfns, unsigned long npages) |
927 | { |
928 | struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns); |
929 | int ret; |
930 | |
931 | args->p.addr = addr; |
932 | args->p.size = npages << PAGE_SHIFT; |
933 | |
934 | mutex_lock(&svmm->mutex); |
935 | |
936 | ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, |
937 | struct_size(args, p.phys, npages), NULL); |
938 | |
939 | mutex_unlock(lock: &svmm->mutex); |
940 | } |
941 | |
942 | static void |
943 | nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id) |
944 | { |
945 | struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; |
946 | |
947 | nvif_event_block(&buffer->notify); |
948 | flush_work(work: &buffer->work); |
949 | } |
950 | |
951 | static int |
952 | nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id) |
953 | { |
954 | struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; |
955 | struct nvif_object *device = &svm->drm->client.device.object; |
956 | |
957 | buffer->get = nvif_rd32(device, buffer->getaddr); |
958 | buffer->put = nvif_rd32(device, buffer->putaddr); |
959 | SVM_DBG(svm, "get %08x put %08x (init)" , buffer->get, buffer->put); |
960 | |
961 | return nvif_event_allow(&buffer->notify); |
962 | } |
963 | |
964 | static void |
965 | nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id) |
966 | { |
967 | struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; |
968 | int i; |
969 | |
970 | if (!nvif_object_constructed(&buffer->object)) |
971 | return; |
972 | |
973 | nouveau_svm_fault_buffer_fini(svm, id); |
974 | |
975 | if (buffer->fault) { |
976 | for (i = 0; buffer->fault[i] && i < buffer->entries; i++) |
977 | kfree(objp: buffer->fault[i]); |
978 | kvfree(addr: buffer->fault); |
979 | } |
980 | |
981 | nvif_event_dtor(&buffer->notify); |
982 | nvif_object_dtor(&buffer->object); |
983 | } |
984 | |
985 | static int |
986 | nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id) |
987 | { |
988 | struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id]; |
989 | struct nouveau_drm *drm = svm->drm; |
990 | struct nvif_object *device = &drm->client.device.object; |
991 | struct nvif_clb069_v0 args = {}; |
992 | int ret; |
993 | |
994 | buffer->id = id; |
995 | |
996 | ret = nvif_object_ctor(device, "svmFaultBuffer" , 0, oclass, &args, |
997 | sizeof(args), &buffer->object); |
998 | if (ret < 0) { |
999 | SVM_ERR(svm, "Fault buffer allocation failed: %d" , ret); |
1000 | return ret; |
1001 | } |
1002 | |
1003 | nvif_object_map(&buffer->object, NULL, 0); |
1004 | buffer->entries = args.entries; |
1005 | buffer->getaddr = args.get; |
1006 | buffer->putaddr = args.put; |
1007 | INIT_WORK(&buffer->work, nouveau_svm_fault); |
1008 | |
1009 | ret = nvif_event_ctor(&buffer->object, "svmFault" , id, nouveau_svm_event, true, NULL, 0, |
1010 | &buffer->notify); |
1011 | if (ret) |
1012 | return ret; |
1013 | |
1014 | buffer->fault = kvcalloc(n: sizeof(*buffer->fault), size: buffer->entries, GFP_KERNEL); |
1015 | if (!buffer->fault) |
1016 | return -ENOMEM; |
1017 | |
1018 | return nouveau_svm_fault_buffer_init(svm, id); |
1019 | } |
1020 | |
1021 | void |
1022 | nouveau_svm_resume(struct nouveau_drm *drm) |
1023 | { |
1024 | struct nouveau_svm *svm = drm->svm; |
1025 | if (svm) |
1026 | nouveau_svm_fault_buffer_init(svm, id: 0); |
1027 | } |
1028 | |
1029 | void |
1030 | nouveau_svm_suspend(struct nouveau_drm *drm) |
1031 | { |
1032 | struct nouveau_svm *svm = drm->svm; |
1033 | if (svm) |
1034 | nouveau_svm_fault_buffer_fini(svm, id: 0); |
1035 | } |
1036 | |
1037 | void |
1038 | nouveau_svm_fini(struct nouveau_drm *drm) |
1039 | { |
1040 | struct nouveau_svm *svm = drm->svm; |
1041 | if (svm) { |
1042 | nouveau_svm_fault_buffer_dtor(svm, id: 0); |
1043 | kfree(objp: drm->svm); |
1044 | drm->svm = NULL; |
1045 | } |
1046 | } |
1047 | |
1048 | void |
1049 | nouveau_svm_init(struct nouveau_drm *drm) |
1050 | { |
1051 | static const struct nvif_mclass buffers[] = { |
1052 | { VOLTA_FAULT_BUFFER_A, 0 }, |
1053 | { MAXWELL_FAULT_BUFFER_A, 0 }, |
1054 | {} |
1055 | }; |
1056 | struct nouveau_svm *svm; |
1057 | int ret; |
1058 | |
1059 | /* Disable on Volta and newer until channel recovery is fixed, |
1060 | * otherwise clients will have a trivial way to trash the GPU |
1061 | * for everyone. |
1062 | */ |
1063 | if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL) |
1064 | return; |
1065 | |
1066 | drm->svm = svm = kzalloc(struct_size(drm->svm, buffer, 1), GFP_KERNEL); |
1067 | if (!drm->svm) |
1068 | return; |
1069 | |
1070 | drm->svm->drm = drm; |
1071 | mutex_init(&drm->svm->mutex); |
1072 | INIT_LIST_HEAD(list: &drm->svm->inst); |
1073 | |
1074 | ret = nvif_mclass(&drm->client.device.object, buffers); |
1075 | if (ret < 0) { |
1076 | SVM_DBG(svm, "No supported fault buffer class" ); |
1077 | nouveau_svm_fini(drm); |
1078 | return; |
1079 | } |
1080 | |
1081 | ret = nouveau_svm_fault_buffer_ctor(svm, oclass: buffers[ret].oclass, id: 0); |
1082 | if (ret) { |
1083 | nouveau_svm_fini(drm); |
1084 | return; |
1085 | } |
1086 | |
1087 | SVM_DBG(svm, "Initialised" ); |
1088 | } |
1089 | |