nouveau_svm.c source code [linux/drivers/gpu/drm/nouveau/nouveau_svm.c]

1	/*
2	* Copyright 2018 Red Hat Inc.
3	*
4	* Permission is hereby granted, free of charge, to any person obtaining a
5	* copy of this software and associated documentation files (the "Software"),
6	* to deal in the Software without restriction, including without limitation
7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8	* and/or sell copies of the Software, and to permit persons to whom the
9	* Software is furnished to do so, subject to the following conditions:
10	*
11	* The above copyright notice and this permission notice shall be included in
12	* all copies or substantial portions of the Software.
13	*
14	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20	* OTHER DEALINGS IN THE SOFTWARE.
21	*/
22	#include "nouveau_svm.h"
23	#include "nouveau_drv.h"
24	#include "nouveau_chan.h"
25	#include "nouveau_dmem.h"
26
27	#include <nvif/event.h>
28	#include <nvif/object.h>
29	#include <nvif/vmm.h>
30
31	#include <nvif/class.h>
32	#include <nvif/clb069.h>
33	#include <nvif/ifc00d.h>
34
35	#include <linux/sched/mm.h>
36	#include <linux/sort.h>
37	#include <linux/hmm.h>
38	#include <linux/memremap.h>
39	#include <linux/rmap.h>
40
41	struct nouveau_svm {
42	struct nouveau_drm *drm;
43	struct mutex mutex;
44	struct list_head inst;
45
46	struct nouveau_svm_fault_buffer {
47	int id;
48	struct nvif_object object;
49	u32 entries;
50	u32 getaddr;
51	u32 putaddr;
52	u32 get;
53	u32 put;
54	struct nvif_event notify;
55	struct work_struct work;
56
57	struct nouveau_svm_fault {
58	u64 inst;
59	u64 addr;
60	u64 time;
61	u32 engine;
62	u8 gpc;
63	u8 hub;
64	u8 access;
65	u8 client;
66	u8 fault;
67	struct nouveau_svmm *svmm;
68	} **fault;
69	int fault_nr;
70	} buffer[];
71	};
72
73	#define FAULT_ACCESS_READ 0
74	#define FAULT_ACCESS_WRITE 1
75	#define FAULT_ACCESS_ATOMIC 2
76	#define FAULT_ACCESS_PREFETCH 3
77
78	#define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a)
79	#define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a)
80
81	struct nouveau_pfnmap_args {
82	struct nvif_ioctl_v0 i;
83	struct nvif_ioctl_mthd_v0 m;
84	struct nvif_vmm_pfnmap_v0 p;
85	};
86
87	struct nouveau_ivmm {
88	struct nouveau_svmm *svmm;
89	u64 inst;
90	struct list_head head;
91	};
92
93	static struct nouveau_ivmm *
94	nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
95	{
96	struct nouveau_ivmm *ivmm;
97	list_for_each_entry(ivmm, &svm->inst, head) {
98	if (ivmm->inst == inst)
99	return ivmm;
100	}
101	return NULL;
102	}
103
104	#define SVMM_DBG(s,f,a...) \
105	NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
106	#define SVMM_ERR(s,f,a...) \
107	NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
108
109	int
110	nouveau_svmm_bind(struct drm_device dev, void* *data,
111	struct drm_file *file_priv)
112	{
113	struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv);
114	struct drm_nouveau_svm_bind *args = data;
115	unsigned target, cmd, priority;
116	unsigned long addr, end;
117	struct mm_struct *mm;
118
119	args->va_start &= PAGE_MASK;
120	args->va_end = ALIGN(args->va_end, PAGE_SIZE);
121
122	/ Sanity check arguments /
123	if (args->reserved0 \|\| args->reserved1)
124	return -EINVAL;
125	if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK))
126	return -EINVAL;
127	if (args->va_start >= args->va_end)
128	return -EINVAL;
129
130	cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT;
131	cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK;
132	switch (cmd) {
133	case NOUVEAU_SVM_BIND_COMMAND__MIGRATE:
134	break;
135	default:
136	return -EINVAL;
137	}
138
139	priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT;
140	priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK;
141
142	/ FIXME support CPU target ie all target value < GPU_VRAM /
143	target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT;
144	target &= NOUVEAU_SVM_BIND_TARGET_MASK;
145	switch (target) {
146	case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM:
147	break;
148	default:
149	return -EINVAL;
150	}
151
152	/*
153	* FIXME: For now refuse non 0 stride, we need to change the migrate
154	* kernel function to handle stride to avoid to create a mess within
155	* each device driver.
156	*/
157	if (args->stride)
158	return -EINVAL;
159
160	/*
161	* Ok we are ask to do something sane, for now we only support migrate
162	* commands but we will add things like memory policy (what to do on
163	* page fault) and maybe some other commands.
164	*/
165
166	mm = get_task_mm(current);
167	if (!mm) {
168	return -EINVAL;
169	}
170	mmap_read_lock(mm);
171
172	if (!cli->svm.svmm) {
173	mmap_read_unlock(mm);
174	mmput(mm);
175	return -EINVAL;
176	}
177
178	for (addr = args->va_start, end = args->va_end; addr < end;) {
179	struct vm_area_struct *vma;
180	unsigned long next;
181
182	vma = find_vma_intersection(mm, start_addr: addr, end_addr: end);
183	if (!vma)
184	break;
185
186	addr = max(addr, vma->vm_start);
187	next = min(vma->vm_end, end);
188	/ This is a best effort so we ignore errors /
189	nouveau_dmem_migrate_vma(drm: cli->drm, svmm: cli->svm.svmm, vma, start: addr,
190	end: next);
191	addr = next;
192	}
193
194	/*
195	* FIXME Return the number of page we have migrated, again we need to
196	* update the migrate API to return that information so that we can
197	* report it to user space.
198	*/
199	args->result = `0`;
200
201	mmap_read_unlock(mm);
202	mmput(mm);
203
204	return `0`;
205	}
206
207	/ Unlink channel instance from SVMM. /
208	void
209	nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst)
210	{
211	struct nouveau_ivmm *ivmm;
212	if (svmm) {
213	mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
214	ivmm = nouveau_ivmm_find(svm: svmm->vmm->cli->drm->svm, inst);
215	if (ivmm) {
216	list_del(entry: &ivmm->head);
217	kfree(objp: ivmm);
218	}
219	mutex_unlock(lock: &svmm->vmm->cli->drm->svm->mutex);
220	}
221	}
222
223	/ Link channel instance to SVMM. /
224	int
225	nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
226	{
227	struct nouveau_ivmm *ivmm;
228	if (svmm) {
229	if (!(ivmm = kmalloc(size: sizeof(*ivmm), GFP_KERNEL)))
230	return -ENOMEM;
231	ivmm->svmm = svmm;
232	ivmm->inst = inst;
233
234	mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
235	list_add(new: &ivmm->head, head: &svmm->vmm->cli->drm->svm->inst);
236	mutex_unlock(lock: &svmm->vmm->cli->drm->svm->mutex);
237	}
238	return `0`;
239	}
240
241	/ Invalidate SVMM address-range on GPU. /
242	void
243	nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
244	{
245	if (limit > start) {
246	nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR,
247	&(struct nvif_vmm_pfnclr_v0) {
248	.addr = start,
249	.size = limit - start,
250	}, sizeof(struct nvif_vmm_pfnclr_v0));
251	}
252	}
253
254	static int
255	nouveau_svmm_invalidate_range_start(struct mmu_notifier *mn,
256	const struct mmu_notifier_range *update)
257	{
258	struct nouveau_svmm *svmm =
259	container_of(mn, struct nouveau_svmm, notifier);
260	unsigned long start = update->start;
261	unsigned long limit = update->end;
262
263	if (!mmu_notifier_range_blockable(range: update))
264	return -EAGAIN;
265
266	SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
267
268	mutex_lock(&svmm->mutex);
269	if (unlikely(!svmm->vmm))
270	goto out;
271
272	/*
273	* Ignore invalidation callbacks for device private pages since
274	* the invalidation is handled as part of the migration process.
275	*/
276	if (update->event == MMU_NOTIFY_MIGRATE &&
277	update->owner == svmm->vmm->cli->drm->dev)
278	goto out;
279
280	if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
281	if (start < svmm->unmanaged.start) {
282	nouveau_svmm_invalidate(svmm, start,
283	limit: svmm->unmanaged.limit);
284	}
285	start = svmm->unmanaged.limit;
286	}
287
288	nouveau_svmm_invalidate(svmm, start, limit);
289
290	out:
291	mutex_unlock(lock: &svmm->mutex);
292	return `0`;
293	}
294
295	static void nouveau_svmm_free_notifier(struct mmu_notifier *mn)
296	{
297	kfree(container_of(mn, struct nouveau_svmm, notifier));
298	}
299
300	static const struct mmu_notifier_ops nouveau_mn_ops = {
301	.invalidate_range_start = nouveau_svmm_invalidate_range_start,
302	.free_notifier = nouveau_svmm_free_notifier,
303	};
304
305	void
306	nouveau_svmm_fini(struct nouveau_svmm **psvmm)
307	{
308	struct nouveau_svmm svmm = psvmm;
309	if (svmm) {
310	mutex_lock(&svmm->mutex);
311	svmm->vmm = NULL;
312	mutex_unlock(lock: &svmm->mutex);
313	mmu_notifier_put(subscription: &svmm->notifier);
314	*psvmm = NULL;
315	}
316	}
317
318	int
319	nouveau_svmm_init(struct drm_device dev, void* *data,
320	struct drm_file *file_priv)
321	{
322	struct nouveau_cli *cli = nouveau_cli(fpriv: file_priv);
323	struct nouveau_svmm *svmm;
324	struct drm_nouveau_svm_init *args = data;
325	int ret;
326
327	/ We need to fail if svm is disabled /
328	if (!cli->drm->svm)
329	return -ENOSYS;
330
331	/ Allocate tracking for SVM-enabled VMM. /
332	if (!(svmm = kzalloc(size: sizeof(*svmm), GFP_KERNEL)))
333	return -ENOMEM;
334	svmm->vmm = &cli->svm;
335	svmm->unmanaged.start = args->unmanaged_addr;
336	svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size;
337	mutex_init(&svmm->mutex);
338
339	/ Check that SVM isn't already enabled for the client. /
340	mutex_lock(&cli->mutex);
341	if (cli->svm.cli) {
342	ret = -EBUSY;
343	goto out_free;
344	}
345
346	/ Allocate a new GPU VMM that can support SVM (managed by the*
347	* client, with replayable faults enabled).
348	*
349	* All future channel/memory allocations will make use of this
350	* VMM instead of the standard one.
351	*/
352	ret = nvif_vmm_ctor(&cli->mmu, "svmVmm",
353	cli->vmm.vmm.object.oclass, MANAGED,
354	args->unmanaged_addr, args->unmanaged_size,
355	&(struct gp100_vmm_v0) {
356	.fault_replay = true,
357	}, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
358	if (ret)
359	goto out_free;
360
361	mmap_write_lock(current->mm);
362	svmm->notifier.ops = &nouveau_mn_ops;
363	ret = __mmu_notifier_register(subscription: &svmm->notifier, current->mm);
364	if (ret)
365	goto out_mm_unlock;
366	/ Note, ownership of svmm transfers to mmu_notifier /
367
368	cli->svm.svmm = svmm;
369	cli->svm.cli = cli;
370	mmap_write_unlock(current->mm);
371	mutex_unlock(lock: &cli->mutex);
372	return `0`;
373
374	out_mm_unlock:
375	mmap_write_unlock(current->mm);
376	out_free:
377	mutex_unlock(lock: &cli->mutex);
378	kfree(objp: svmm);
379	return ret;
380	}
381
382	/ Issue fault replay for GPU to retry accesses that faulted previously. /
383	static void
384	nouveau_svm_fault_replay(struct nouveau_svm *svm)
385	{
386	SVM_DBG(svm, "replay");
387	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
388	GP100_VMM_VN_FAULT_REPLAY,
389	&(struct gp100_vmm_fault_replay_vn) {},
390	sizeof(struct gp100_vmm_fault_replay_vn)));
391	}
392
393	/ Cancel a replayable fault that could not be handled.*
394	*
395	* Cancelling the fault will trigger recovery to reset the engine
396	* and kill the offending channel (ie. GPU SIGSEGV).
397	*/
398	static void
399	nouveau_svm_fault_cancel(struct nouveau_svm *svm,
400	u64 inst, u8 hub, u8 gpc, u8 client)
401	{
402	SVM_DBG(svm, "cancel %016llx %d %02x %02x", inst, hub, gpc, client);
403	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
404	GP100_VMM_VN_FAULT_CANCEL,
405	&(struct gp100_vmm_fault_cancel_v0) {
406	.hub = hub,
407	.gpc = gpc,
408	.client = client,
409	.inst = inst,
410	}, sizeof(struct gp100_vmm_fault_cancel_v0)));
411	}
412
413	static void
414	nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm,
415	struct nouveau_svm_fault *fault)
416	{
417	nouveau_svm_fault_cancel(svm, inst: fault->inst,
418	hub: fault->hub,
419	gpc: fault->gpc,
420	client: fault->client);
421	}
422
423	static int
424	nouveau_svm_fault_priority(u8 fault)
425	{
426	switch (fault) {
427	case FAULT_ACCESS_PREFETCH:
428	return `0`;
429	case FAULT_ACCESS_READ:
430	return `1`;
431	case FAULT_ACCESS_WRITE:
432	return `2`;
433	case FAULT_ACCESS_ATOMIC:
434	return `3`;
435	default:
436	WARN_ON_ONCE(`1`);
437	return -`1`;
438	}
439	}
440
441	static int
442	nouveau_svm_fault_cmp(const void a, const* void *b)
443	{
444	const struct nouveau_svm_fault fa = (struct nouveau_svm_fault **)a;
445	const struct nouveau_svm_fault fb = (struct nouveau_svm_fault **)b;
446	int ret;
447	if ((ret = (s64)fa->inst - fb->inst))
448	return ret;
449	if ((ret = (s64)fa->addr - fb->addr))
450	return ret;
451	return nouveau_svm_fault_priority(fault: fa->access) -
452	nouveau_svm_fault_priority(fault: fb->access);
453	}
454
455	static void
456	nouveau_svm_fault_cache(struct nouveau_svm *svm,
457	struct nouveau_svm_fault_buffer *buffer, u32 offset)
458	{
459	struct nvif_object *memory = &buffer->object;
460	const u32 instlo = nvif_rd32(memory, offset + `0x00`);
461	const u32 insthi = nvif_rd32(memory, offset + `0x04`);
462	const u32 addrlo = nvif_rd32(memory, offset + `0x08`);
463	const u32 addrhi = nvif_rd32(memory, offset + `0x0c`);
464	const u32 timelo = nvif_rd32(memory, offset + `0x10`);
465	const u32 timehi = nvif_rd32(memory, offset + `0x14`);
466	const u32 engine = nvif_rd32(memory, offset + `0x18`);
467	const u32 info = nvif_rd32(memory, offset + `0x1c`);
468	const u64 inst = (u64)insthi << `32` \| instlo;
469	const u8 gpc = (info & `0x1f000000`) >> `24`;
470	const u8 hub = (info & `0x00100000`) >> `20`;
471	const u8 client = (info & `0x00007f00`) >> `8`;
472	struct nouveau_svm_fault *fault;
473
474	//XXX: i think we're supposed to spin waiting /*
475	if (WARN_ON(!(info & `0x80000000`)))
476	return;
477
478	nvif_mask(memory, offset + `0x1c`, `0x80000000`, `0x00000000`);
479
480	if (!buffer->fault[buffer->fault_nr]) {
481	fault = kmalloc(size: sizeof(*fault), GFP_KERNEL);
482	if (WARN_ON(!fault)) {
483	nouveau_svm_fault_cancel(svm, inst, hub, gpc, client);
484	return;
485	}
486	buffer->fault[buffer->fault_nr] = fault;
487	}
488
489	fault = buffer->fault[buffer->fault_nr++];
490	fault->inst = inst;
491	fault->addr = (u64)addrhi << `32` \| addrlo;
492	fault->time = (u64)timehi << `32` \| timelo;
493	fault->engine = engine;
494	fault->gpc = gpc;
495	fault->hub = hub;
496	fault->access = (info & `0x000f0000`) >> `16`;
497	fault->client = client;
498	fault->fault = (info & `0x0000001f`);
499
500	SVM_DBG(svm, "fault %016llx %016llx %02x",
501	fault->inst, fault->addr, fault->access);
502	}
503
504	struct svm_notifier {
505	struct mmu_interval_notifier notifier;
506	struct nouveau_svmm *svmm;
507	};
508
509	static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni,
510	const struct mmu_notifier_range *range,
511	unsigned long cur_seq)
512	{
513	struct svm_notifier *sn =
514	container_of(mni, struct svm_notifier, notifier);
515
516	if (range->event == MMU_NOTIFY_EXCLUSIVE &&
517	range->owner == sn->svmm->vmm->cli->drm->dev)
518	return true;
519
520	/*
521	* serializes the update to mni->invalidate_seq done by caller and
522	* prevents invalidation of the PTE from progressing while HW is being
523	* programmed. This is very hacky and only works because the normal
524	* notifier that does invalidation is always called after the range
525	* notifier.
526	*/
527	if (mmu_notifier_range_blockable(range))
528	mutex_lock(&sn->svmm->mutex);
529	else if (!mutex_trylock(lock: &sn->svmm->mutex))
530	return false;
531	mmu_interval_set_seq(interval_sub: mni, cur_seq);
532	mutex_unlock(lock: &sn->svmm->mutex);
533	return true;
534	}
535
536	static const struct mmu_interval_notifier_ops nouveau_svm_mni_ops = {
537	.invalidate = nouveau_svm_range_invalidate,
538	};
539
540	static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm,
541	struct hmm_range *range,
542	struct nouveau_pfnmap_args *args)
543	{
544	struct page *page;
545
546	/*
547	* The address prepared here is passed through nvif_object_ioctl()
548	* to an eventual DMA map in something like gp100_vmm_pgt_pfn()
549	*
550	* This is all just encoding the internal hmm representation into a
551	* different nouveau internal representation.
552	*/
553	if (!(range->hmm_pfns[`0`] & HMM_PFN_VALID)) {
554	args->p.phys[`0`] = `0`;
555	return;
556	}
557
558	page = hmm_pfn_to_page(hmm_pfn: range->hmm_pfns[`0`]);
559	/*
560	* Only map compound pages to the GPU if the CPU is also mapping the
561	* page as a compound page. Otherwise, the PTE protections might not be
562	* consistent (e.g., CPU only maps part of a compound page).
563	* Note that the underlying page might still be larger than the
564	* CPU mapping (e.g., a PUD sized compound page partially mapped with
565	* a PMD sized page table entry).
566	*/
567	if (hmm_pfn_to_map_order(hmm_pfn: range->hmm_pfns[`0`])) {
568	unsigned long addr = args->p.addr;
569
570	args->p.page = hmm_pfn_to_map_order(hmm_pfn: range->hmm_pfns[`0`]) +
571	PAGE_SHIFT;
572	args->p.size = `1UL` << args->p.page;
573	args->p.addr &= ~(args->p.size - `1`);
574	page -= (addr - args->p.addr) >> PAGE_SHIFT;
575	}
576	if (is_device_private_page(page))
577	args->p.phys[`0`] = nouveau_dmem_page_addr(page) \|
578	NVIF_VMM_PFNMAP_V0_V \|
579	NVIF_VMM_PFNMAP_V0_VRAM;
580	else
581	args->p.phys[`0`] = page_to_phys(page) \|
582	NVIF_VMM_PFNMAP_V0_V \|
583	NVIF_VMM_PFNMAP_V0_HOST;
584	if (range->hmm_pfns[`0`] & HMM_PFN_WRITE)
585	args->p.phys[`0`] \|= NVIF_VMM_PFNMAP_V0_W;
586	}
587
588	static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm,
589	struct nouveau_drm *drm,
590	struct nouveau_pfnmap_args *args, u32 size,
591	struct svm_notifier *notifier)
592	{
593	unsigned long timeout =
594	jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
595	struct mm_struct *mm = svmm->notifier.mm;
596	struct page *page;
597	unsigned long start = args->p.addr;
598	unsigned long notifier_seq;
599	int ret = `0`;
600
601	ret = mmu_interval_notifier_insert(interval_sub: &notifier->notifier, mm,
602	start: args->p.addr, length: args->p.size,
603	ops: &nouveau_svm_mni_ops);
604	if (ret)
605	return ret;
606
607	while (true) {
608	if (time_after(jiffies, timeout)) {
609	ret = -EBUSY;
610	goto out;
611	}
612
613	notifier_seq = mmu_interval_read_begin(interval_sub: &notifier->notifier);
614	mmap_read_lock(mm);
615	ret = make_device_exclusive_range(mm, start, end: start + PAGE_SIZE,
616	pages: &page, arg: drm->dev);
617	mmap_read_unlock(mm);
618	if (ret <= `0` \|\| !page) {
619	ret = -EINVAL;
620	goto out;
621	}
622
623	mutex_lock(&svmm->mutex);
624	if (!mmu_interval_read_retry(interval_sub: &notifier->notifier,
625	seq: notifier_seq))
626	break;
627	mutex_unlock(lock: &svmm->mutex);
628	}
629
630	/ Map the page on the GPU. /
631	args->p.page = `12`;
632	args->p.size = PAGE_SIZE;
633	args->p.addr = start;
634	args->p.phys[`0`] = page_to_phys(page) \|
635	NVIF_VMM_PFNMAP_V0_V \|
636	NVIF_VMM_PFNMAP_V0_W \|
637	NVIF_VMM_PFNMAP_V0_A \|
638	NVIF_VMM_PFNMAP_V0_HOST;
639
640	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
641	mutex_unlock(lock: &svmm->mutex);
642
643	unlock_page(page);
644	put_page(page);
645
646	out:
647	mmu_interval_notifier_remove(interval_sub: &notifier->notifier);
648	return ret;
649	}
650
651	static int nouveau_range_fault(struct nouveau_svmm *svmm,
652	struct nouveau_drm *drm,
653	struct nouveau_pfnmap_args *args, u32 size,
654	unsigned long hmm_flags,
655	struct svm_notifier *notifier)
656	{
657	unsigned long timeout =
658	jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
659	/ Have HMM fault pages within the fault window to the GPU. /
660	unsigned long hmm_pfns[`1`];
661	struct hmm_range range = {
662	.notifier = &notifier->notifier,
663	.default_flags = hmm_flags,
664	.hmm_pfns = hmm_pfns,
665	.dev_private_owner = drm->dev,
666	};
667	struct mm_struct *mm = svmm->notifier.mm;
668	int ret;
669
670	ret = mmu_interval_notifier_insert(interval_sub: &notifier->notifier, mm,
671	start: args->p.addr, length: args->p.size,
672	ops: &nouveau_svm_mni_ops);
673	if (ret)
674	return ret;
675
676	range.start = notifier->notifier.interval_tree.start;
677	range.end = notifier->notifier.interval_tree.last + `1`;
678
679	while (true) {
680	if (time_after(jiffies, timeout)) {
681	ret = -EBUSY;
682	goto out;
683	}
684
685	range.notifier_seq = mmu_interval_read_begin(interval_sub: range.notifier);
686	mmap_read_lock(mm);
687	ret = hmm_range_fault(range: &range);
688	mmap_read_unlock(mm);
689	if (ret) {
690	if (ret == -EBUSY)
691	continue;
692	goto out;
693	}
694
695	mutex_lock(&svmm->mutex);
696	if (mmu_interval_read_retry(interval_sub: range.notifier,
697	seq: range.notifier_seq)) {
698	mutex_unlock(lock: &svmm->mutex);
699	continue;
700	}
701	break;
702	}
703
704	nouveau_hmm_convert_pfn(drm, range: &range, args);
705
706	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL);
707	mutex_unlock(lock: &svmm->mutex);
708
709	out:
710	mmu_interval_notifier_remove(interval_sub: &notifier->notifier);
711
712	return ret;
713	}
714
715	static void
716	nouveau_svm_fault(struct work_struct *work)
717	{
718	struct nouveau_svm_fault_buffer buffer = container_of(work, typeof(buffer), work);
719	struct nouveau_svm svm = container_of(buffer, typeof(svm), buffer[buffer->id]);
720	struct nvif_object *device = &svm->drm->client.device.object;
721	struct nouveau_svmm *svmm;
722	struct {
723	struct nouveau_pfnmap_args i;
724	u64 phys[`1`];
725	} args;
726	unsigned long hmm_flags;
727	u64 inst, start, limit;
728	int fi, fn;
729	int replay = `0`, atomic = `0`, ret;
730
731	/ Parse available fault buffer entries into a cache, and update*
732	* the GET pointer so HW can reuse the entries.
733	*/
734	SVM_DBG(svm, "fault handler");
735	if (buffer->get == buffer->put) {
736	buffer->put = nvif_rd32(device, buffer->putaddr);
737	buffer->get = nvif_rd32(device, buffer->getaddr);
738	if (buffer->get == buffer->put)
739	return;
740	}
741	buffer->fault_nr = `0`;
742
743	SVM_DBG(svm, "get %08x put %08x", buffer->get, buffer->put);
744	while (buffer->get != buffer->put) {
745	nouveau_svm_fault_cache(svm, buffer, offset: buffer->get * `0x20`);
746	if (++buffer->get == buffer->entries)
747	buffer->get = `0`;
748	}
749	nvif_wr32(device, buffer->getaddr, buffer->get);
750	SVM_DBG(svm, "%d fault(s) pending", buffer->fault_nr);
751
752	/ Sort parsed faults by instance pointer to prevent unnecessary*
753	* instance to SVMM translations, followed by address and access
754	* type to reduce the amount of work when handling the faults.
755	*/
756	sort(base: buffer->fault, num: buffer->fault_nr, size: sizeof(*buffer->fault),
757	cmp_func: nouveau_svm_fault_cmp, NULL);
758
759	/ Lookup SVMM structure for each unique instance pointer. /
760	mutex_lock(&svm->mutex);
761	for (fi = `0`, svmm = NULL; fi < buffer->fault_nr; fi++) {
762	if (!svmm \|\| buffer->fault[fi]->inst != inst) {
763	struct nouveau_ivmm *ivmm =
764	nouveau_ivmm_find(svm, inst: buffer->fault[fi]->inst);
765	svmm = ivmm ? ivmm->svmm : NULL;
766	inst = buffer->fault[fi]->inst;
767	SVM_DBG(svm, "inst %016llx -> svm-%p", inst, svmm);
768	}
769	buffer->fault[fi]->svmm = svmm;
770	}
771	mutex_unlock(lock: &svm->mutex);
772
773	/ Process list of faults. /
774	args.i.i.version = `0`;
775	args.i.i.type = NVIF_IOCTL_V0_MTHD;
776	args.i.m.version = `0`;
777	args.i.m.method = NVIF_VMM_V0_PFNMAP;
778	args.i.p.version = `0`;
779
780	for (fi = `0`; fn = fi + `1`, fi < buffer->fault_nr; fi = fn) {
781	struct svm_notifier notifier;
782	struct mm_struct *mm;
783
784	/ Cancel any faults from non-SVM channels. /
785	if (!(svmm = buffer->fault[fi]->svmm)) {
786	nouveau_svm_fault_cancel_fault(svm, fault: buffer->fault[fi]);
787	continue;
788	}
789	SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
790
791	/ We try and group handling of faults within a small*
792	* window into a single update.
793	*/
794	start = buffer->fault[fi]->addr;
795	limit = start + PAGE_SIZE;
796	if (start < svmm->unmanaged.limit)
797	limit = min_t(u64, limit, svmm->unmanaged.start);
798
799	/*
800	* Prepare the GPU-side update of all pages within the
801	* fault window, determining required pages and access
802	* permissions based on pending faults.
803	*/
804	args.i.p.addr = start;
805	args.i.p.page = PAGE_SHIFT;
806	args.i.p.size = PAGE_SIZE;
807	/*
808	* Determine required permissions based on GPU fault
809	* access flags.
810	*/
811	switch (buffer->fault[fi]->access) {
812	case `0`: / READ. /
813	hmm_flags = HMM_PFN_REQ_FAULT;
814	break;
815	case `2`: / ATOMIC. /
816	atomic = true;
817	break;
818	case `3`: / PREFETCH. /
819	hmm_flags = `0`;
820	break;
821	default:
822	hmm_flags = HMM_PFN_REQ_FAULT \| HMM_PFN_REQ_WRITE;
823	break;
824	}
825
826	mm = svmm->notifier.mm;
827	if (!mmget_not_zero(mm)) {
828	nouveau_svm_fault_cancel_fault(svm, fault: buffer->fault[fi]);
829	continue;
830	}
831
832	notifier.svmm = svmm;
833	if (atomic)
834	ret = nouveau_atomic_range_fault(svmm, drm: svm->drm,
835	args: &args.i, size: sizeof(args),
836	notifier: &notifier);
837	else
838	ret = nouveau_range_fault(svmm, drm: svm->drm, args: &args.i,
839	size: sizeof(args), hmm_flags,
840	notifier: &notifier);
841	mmput(mm);
842
843	limit = args.i.p.addr + args.i.p.size;
844	for (fn = fi; ++fn < buffer->fault_nr; ) {
845	/ It's okay to skip over duplicate addresses from the*
846	* same SVMM as faults are ordered by access type such
847	* that only the first one needs to be handled.
848	*
849	* ie. WRITE faults appear first, thus any handling of
850	* pending READ faults will already be satisfied.
851	* But if a large page is mapped, make sure subsequent
852	* fault addresses have sufficient access permission.
853	*/
854	if (buffer->fault[fn]->svmm != svmm \|\|
855	buffer->fault[fn]->addr >= limit \|\|
856	(buffer->fault[fi]->access == FAULT_ACCESS_READ &&
857	!(args.phys[`0`] & NVIF_VMM_PFNMAP_V0_V)) \|\|
858	(buffer->fault[fi]->access != FAULT_ACCESS_READ &&
859	buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH &&
860	!(args.phys[`0`] & NVIF_VMM_PFNMAP_V0_W)) \|\|
861	(buffer->fault[fi]->access != FAULT_ACCESS_READ &&
862	buffer->fault[fi]->access != FAULT_ACCESS_WRITE &&
863	buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH &&
864	!(args.phys[`0`] & NVIF_VMM_PFNMAP_V0_A)))
865	break;
866	}
867
868	/ If handling failed completely, cancel all faults. /
869	if (ret) {
870	while (fi < fn) {
871	struct nouveau_svm_fault *fault =
872	buffer->fault[fi++];
873
874	nouveau_svm_fault_cancel_fault(svm, fault);
875	}
876	} else
877	replay++;
878	}
879
880	/ Issue fault replay to the GPU. /
881	if (replay)
882	nouveau_svm_fault_replay(svm);
883	}
884
885	static int
886	nouveau_svm_event(struct nvif_event event, void* *argv, u32 argc)
887	{
888	struct nouveau_svm_fault_buffer buffer = container_of(event, typeof(buffer), notify);
889
890	schedule_work(work: &buffer->work);
891	return NVIF_EVENT_KEEP;
892	}
893
894	static struct nouveau_pfnmap_args *
895	nouveau_pfns_to_args(void *pfns)
896	{
897	return container_of(pfns, struct nouveau_pfnmap_args, p.phys);
898	}
899
900	u64 *
901	nouveau_pfns_alloc(unsigned long npages)
902	{
903	struct nouveau_pfnmap_args *args;
904
905	args = kzalloc(struct_size(args, p.phys, npages), GFP_KERNEL);
906	if (!args)
907	return NULL;
908
909	args->i.type = NVIF_IOCTL_V0_MTHD;
910	args->m.method = NVIF_VMM_V0_PFNMAP;
911	args->p.page = PAGE_SHIFT;
912
913	return args->p.phys;
914	}
915
916	void
917	nouveau_pfns_free(u64 *pfns)
918	{
919	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
920
921	kfree(objp: args);
922	}
923
924	void
925	nouveau_pfns_map(struct nouveau_svmm svmm, struct* mm_struct *mm,
926	unsigned long addr, u64 pfns, unsigned* long npages)
927	{
928	struct nouveau_pfnmap_args *args = nouveau_pfns_to_args(pfns);
929	int ret;
930
931	args->p.addr = addr;
932	args->p.size = npages << PAGE_SHIFT;
933
934	mutex_lock(&svmm->mutex);
935
936	ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args,
937	struct_size(args, p.phys, npages), NULL);
938
939	mutex_unlock(lock: &svmm->mutex);
940	}
941
942	static void
943	nouveau_svm_fault_buffer_fini(struct nouveau_svm svm, int* id)
944	{
945	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
946
947	nvif_event_block(&buffer->notify);
948	flush_work(work: &buffer->work);
949	}
950
951	static int
952	nouveau_svm_fault_buffer_init(struct nouveau_svm svm, int* id)
953	{
954	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
955	struct nvif_object *device = &svm->drm->client.device.object;
956
957	buffer->get = nvif_rd32(device, buffer->getaddr);
958	buffer->put = nvif_rd32(device, buffer->putaddr);
959	SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put);
960
961	return nvif_event_allow(&buffer->notify);
962	}
963
964	static void
965	nouveau_svm_fault_buffer_dtor(struct nouveau_svm svm, int* id)
966	{
967	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
968	int i;
969
970	if (!nvif_object_constructed(&buffer->object))
971	return;
972
973	nouveau_svm_fault_buffer_fini(svm, id);
974
975	if (buffer->fault) {
976	for (i = `0`; buffer->fault[i] && i < buffer->entries; i++)
977	kfree(objp: buffer->fault[i]);
978	kvfree(addr: buffer->fault);
979	}
980
981	nvif_event_dtor(&buffer->notify);
982	nvif_object_dtor(&buffer->object);
983	}
984
985	static int
986	nouveau_svm_fault_buffer_ctor(struct nouveau_svm svm, s32 oclass, int* id)
987	{
988	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
989	struct nouveau_drm *drm = svm->drm;
990	struct nvif_object *device = &drm->client.device.object;
991	struct nvif_clb069_v0 args = {};
992	int ret;
993
994	buffer->id = id;
995
996	ret = nvif_object_ctor(device, "svmFaultBuffer", `0`, oclass, &args,
997	sizeof(args), &buffer->object);
998	if (ret < `0`) {
999	SVM_ERR(svm, "Fault buffer allocation failed: %d", ret);
1000	return ret;
1001	}
1002
1003	nvif_object_map(&buffer->object, NULL, `0`);
1004	buffer->entries = args.entries;
1005	buffer->getaddr = args.get;
1006	buffer->putaddr = args.put;
1007	INIT_WORK(&buffer->work, nouveau_svm_fault);
1008
1009	ret = nvif_event_ctor(&buffer->object, "svmFault", id, nouveau_svm_event, true, NULL, `0`,
1010	&buffer->notify);
1011	if (ret)
1012	return ret;
1013
1014	buffer->fault = kvcalloc(n: sizeof(*buffer->fault), size: buffer->entries, GFP_KERNEL);
1015	if (!buffer->fault)
1016	return -ENOMEM;
1017
1018	return nouveau_svm_fault_buffer_init(svm, id);
1019	}
1020
1021	void
1022	nouveau_svm_resume(struct nouveau_drm *drm)
1023	{
1024	struct nouveau_svm *svm = drm->svm;
1025	if (svm)
1026	nouveau_svm_fault_buffer_init(svm, id: `0`);
1027	}
1028
1029	void
1030	nouveau_svm_suspend(struct nouveau_drm *drm)
1031	{
1032	struct nouveau_svm *svm = drm->svm;
1033	if (svm)
1034	nouveau_svm_fault_buffer_fini(svm, id: `0`);
1035	}
1036
1037	void
1038	nouveau_svm_fini(struct nouveau_drm *drm)
1039	{
1040	struct nouveau_svm *svm = drm->svm;
1041	if (svm) {
1042	nouveau_svm_fault_buffer_dtor(svm, id: `0`);
1043	kfree(objp: drm->svm);
1044	drm->svm = NULL;
1045	}
1046	}
1047
1048	void
1049	nouveau_svm_init(struct nouveau_drm *drm)
1050	{
1051	static const struct nvif_mclass buffers[] = {
1052	{ VOLTA_FAULT_BUFFER_A, `0` },
1053	{ MAXWELL_FAULT_BUFFER_A, `0` },
1054	{}
1055	};
1056	struct nouveau_svm *svm;
1057	int ret;
1058
1059	/ Disable on Volta and newer until channel recovery is fixed,*
1060	* otherwise clients will have a trivial way to trash the GPU
1061	* for everyone.
1062	*/
1063	if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL)
1064	return;
1065
1066	drm->svm = svm = kzalloc(struct_size(drm->svm, buffer, `1`), GFP_KERNEL);
1067	if (!drm->svm)
1068	return;
1069
1070	drm->svm->drm = drm;
1071	mutex_init(&drm->svm->mutex);
1072	INIT_LIST_HEAD(list: &drm->svm->inst);
1073
1074	ret = nvif_mclass(&drm->client.device.object, buffers);
1075	if (ret < `0`) {
1076	SVM_DBG(svm, "No supported fault buffer class");
1077	nouveau_svm_fini(drm);
1078	return;
1079	}
1080
1081	ret = nouveau_svm_fault_buffer_ctor(svm, oclass: buffers[ret].oclass, id: `0`);
1082	if (ret) {
1083	nouveau_svm_fini(drm);
1084	return;
1085	}
1086
1087	SVM_DBG(svm, "Initialised");
1088	}
1089

source code of linux/drivers/gpu/drm/nouveau/nouveau_svm.c