1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2020 Intel Corporation |
4 | */ |
5 | |
6 | #include <asm/set_memory.h> |
7 | #include <asm/smp.h> |
8 | #include <linux/types.h> |
9 | #include <linux/stop_machine.h> |
10 | |
11 | #include <drm/drm_managed.h> |
12 | #include <drm/i915_drm.h> |
13 | #include <drm/intel-gtt.h> |
14 | |
15 | #include "display/intel_display.h" |
16 | #include "gem/i915_gem_lmem.h" |
17 | |
18 | #include "intel_context.h" |
19 | #include "intel_ggtt_gmch.h" |
20 | #include "intel_gpu_commands.h" |
21 | #include "intel_gt.h" |
22 | #include "intel_gt_regs.h" |
23 | #include "intel_pci_config.h" |
24 | #include "intel_ring.h" |
25 | #include "i915_drv.h" |
26 | #include "i915_pci.h" |
27 | #include "i915_reg.h" |
28 | #include "i915_request.h" |
29 | #include "i915_scatterlist.h" |
30 | #include "i915_utils.h" |
31 | #include "i915_vgpu.h" |
32 | |
33 | #include "intel_gtt.h" |
34 | #include "gen8_ppgtt.h" |
35 | #include "intel_engine_pm.h" |
36 | |
37 | static void i915_ggtt_color_adjust(const struct drm_mm_node *node, |
38 | unsigned long color, |
39 | u64 *start, |
40 | u64 *end) |
41 | { |
42 | if (i915_node_color_differs(node, color)) |
43 | *start += I915_GTT_PAGE_SIZE; |
44 | |
45 | /* |
46 | * Also leave a space between the unallocated reserved node after the |
47 | * GTT and any objects within the GTT, i.e. we use the color adjustment |
48 | * to insert a guard page to prevent prefetches crossing over the |
49 | * GTT boundary. |
50 | */ |
51 | node = list_next_entry(node, node_list); |
52 | if (node->color != color) |
53 | *end -= I915_GTT_PAGE_SIZE; |
54 | } |
55 | |
56 | static int ggtt_init_hw(struct i915_ggtt *ggtt) |
57 | { |
58 | struct drm_i915_private *i915 = ggtt->vm.i915; |
59 | |
60 | i915_address_space_init(vm: &ggtt->vm, VM_CLASS_GGTT); |
61 | |
62 | ggtt->vm.is_ggtt = true; |
63 | |
64 | /* Only VLV supports read-only GGTT mappings */ |
65 | ggtt->vm.has_read_only = IS_VALLEYVIEW(i915); |
66 | |
67 | if (!HAS_LLC(i915) && !HAS_PPGTT(i915)) |
68 | ggtt->vm.mm.color_adjust = i915_ggtt_color_adjust; |
69 | |
70 | if (ggtt->mappable_end) { |
71 | if (!io_mapping_init_wc(iomap: &ggtt->iomap, |
72 | base: ggtt->gmadr.start, |
73 | size: ggtt->mappable_end)) { |
74 | ggtt->vm.cleanup(&ggtt->vm); |
75 | return -EIO; |
76 | } |
77 | |
78 | ggtt->mtrr = arch_phys_wc_add(base: ggtt->gmadr.start, |
79 | size: ggtt->mappable_end); |
80 | } |
81 | |
82 | intel_ggtt_init_fences(ggtt); |
83 | |
84 | return 0; |
85 | } |
86 | |
87 | /** |
88 | * i915_ggtt_init_hw - Initialize GGTT hardware |
89 | * @i915: i915 device |
90 | */ |
91 | int i915_ggtt_init_hw(struct drm_i915_private *i915) |
92 | { |
93 | int ret; |
94 | |
95 | /* |
96 | * Note that we use page colouring to enforce a guard page at the |
97 | * end of the address space. This is required as the CS may prefetch |
98 | * beyond the end of the batch buffer, across the page boundary, |
99 | * and beyond the end of the GTT if we do not provide a guard. |
100 | */ |
101 | ret = ggtt_init_hw(ggtt: to_gt(i915)->ggtt); |
102 | if (ret) |
103 | return ret; |
104 | |
105 | return 0; |
106 | } |
107 | |
108 | /** |
109 | * i915_ggtt_suspend_vm - Suspend the memory mappings for a GGTT or DPT VM |
110 | * @vm: The VM to suspend the mappings for |
111 | * |
112 | * Suspend the memory mappings for all objects mapped to HW via the GGTT or a |
113 | * DPT page table. |
114 | */ |
115 | void i915_ggtt_suspend_vm(struct i915_address_space *vm) |
116 | { |
117 | struct i915_vma *vma, *vn; |
118 | int save_skip_rewrite; |
119 | |
120 | drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); |
121 | |
122 | retry: |
123 | i915_gem_drain_freed_objects(i915: vm->i915); |
124 | |
125 | mutex_lock(&vm->mutex); |
126 | |
127 | /* |
128 | * Skip rewriting PTE on VMA unbind. |
129 | * FIXME: Use an argument to i915_vma_unbind() instead? |
130 | */ |
131 | save_skip_rewrite = vm->skip_pte_rewrite; |
132 | vm->skip_pte_rewrite = true; |
133 | |
134 | list_for_each_entry_safe(vma, vn, &vm->bound_list, vm_link) { |
135 | struct drm_i915_gem_object *obj = vma->obj; |
136 | |
137 | GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); |
138 | |
139 | if (i915_vma_is_pinned(vma) || !i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) |
140 | continue; |
141 | |
142 | /* unlikely to race when GPU is idle, so no worry about slowpath.. */ |
143 | if (WARN_ON(!i915_gem_object_trylock(obj, NULL))) { |
144 | /* |
145 | * No dead objects should appear here, GPU should be |
146 | * completely idle, and userspace suspended |
147 | */ |
148 | i915_gem_object_get(obj); |
149 | |
150 | mutex_unlock(lock: &vm->mutex); |
151 | |
152 | i915_gem_object_lock(obj, NULL); |
153 | GEM_WARN_ON(i915_vma_unbind(vma)); |
154 | i915_gem_object_unlock(obj); |
155 | i915_gem_object_put(obj); |
156 | |
157 | vm->skip_pte_rewrite = save_skip_rewrite; |
158 | goto retry; |
159 | } |
160 | |
161 | if (!i915_vma_is_bound(vma, I915_VMA_GLOBAL_BIND)) { |
162 | i915_vma_wait_for_bind(vma); |
163 | |
164 | __i915_vma_evict(vma, async: false); |
165 | drm_mm_remove_node(node: &vma->node); |
166 | } |
167 | |
168 | i915_gem_object_unlock(obj); |
169 | } |
170 | |
171 | vm->clear_range(vm, 0, vm->total); |
172 | |
173 | vm->skip_pte_rewrite = save_skip_rewrite; |
174 | |
175 | mutex_unlock(lock: &vm->mutex); |
176 | } |
177 | |
178 | void i915_ggtt_suspend(struct i915_ggtt *ggtt) |
179 | { |
180 | struct intel_gt *gt; |
181 | |
182 | i915_ggtt_suspend_vm(vm: &ggtt->vm); |
183 | ggtt->invalidate(ggtt); |
184 | |
185 | list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) |
186 | intel_gt_check_and_clear_faults(gt); |
187 | } |
188 | |
189 | void gen6_ggtt_invalidate(struct i915_ggtt *ggtt) |
190 | { |
191 | struct intel_uncore *uncore = ggtt->vm.gt->uncore; |
192 | |
193 | spin_lock_irq(lock: &uncore->lock); |
194 | intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN); |
195 | intel_uncore_read_fw(uncore, GFX_FLSH_CNTL_GEN6); |
196 | spin_unlock_irq(lock: &uncore->lock); |
197 | } |
198 | |
199 | static bool needs_wc_ggtt_mapping(struct drm_i915_private *i915) |
200 | { |
201 | /* |
202 | * On BXT+/ICL+ writes larger than 64 bit to the GTT pagetable range |
203 | * will be dropped. For WC mappings in general we have 64 byte burst |
204 | * writes when the WC buffer is flushed, so we can't use it, but have to |
205 | * resort to an uncached mapping. The WC issue is easily caught by the |
206 | * readback check when writing GTT PTE entries. |
207 | */ |
208 | if (!IS_GEN9_LP(i915) && GRAPHICS_VER(i915) < 11) |
209 | return true; |
210 | |
211 | return false; |
212 | } |
213 | |
214 | static void gen8_ggtt_invalidate(struct i915_ggtt *ggtt) |
215 | { |
216 | struct intel_uncore *uncore = ggtt->vm.gt->uncore; |
217 | |
218 | /* |
219 | * Note that as an uncached mmio write, this will flush the |
220 | * WCB of the writes into the GGTT before it triggers the invalidate. |
221 | * |
222 | * Only perform this when GGTT is mapped as WC, see ggtt_probe_common(). |
223 | */ |
224 | if (needs_wc_ggtt_mapping(i915: ggtt->vm.i915)) |
225 | intel_uncore_write_fw(uncore, GFX_FLSH_CNTL_GEN6, |
226 | GFX_FLSH_CNTL_EN); |
227 | } |
228 | |
229 | static void guc_ggtt_ct_invalidate(struct intel_gt *gt) |
230 | { |
231 | struct intel_uncore *uncore = gt->uncore; |
232 | intel_wakeref_t wakeref; |
233 | |
234 | with_intel_runtime_pm_if_active(uncore->rpm, wakeref) { |
235 | struct intel_guc *guc = >->uc.guc; |
236 | |
237 | intel_guc_invalidate_tlb_guc(guc); |
238 | } |
239 | } |
240 | |
241 | static void guc_ggtt_invalidate(struct i915_ggtt *ggtt) |
242 | { |
243 | struct drm_i915_private *i915 = ggtt->vm.i915; |
244 | struct intel_gt *gt; |
245 | |
246 | gen8_ggtt_invalidate(ggtt); |
247 | |
248 | list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) { |
249 | if (intel_guc_tlb_invalidation_is_available(guc: >->uc.guc)) |
250 | guc_ggtt_ct_invalidate(gt); |
251 | else if (GRAPHICS_VER(i915) >= 12) |
252 | intel_uncore_write_fw(gt->uncore, |
253 | GEN12_GUC_TLB_INV_CR, |
254 | GEN12_GUC_TLB_INV_CR_INVALIDATE); |
255 | else |
256 | intel_uncore_write_fw(gt->uncore, |
257 | GEN8_GTCR, GEN8_GTCR_INVALIDATE); |
258 | } |
259 | } |
260 | |
261 | static u64 mtl_ggtt_pte_encode(dma_addr_t addr, |
262 | unsigned int pat_index, |
263 | u32 flags) |
264 | { |
265 | gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; |
266 | |
267 | WARN_ON_ONCE(addr & ~GEN12_GGTT_PTE_ADDR_MASK); |
268 | |
269 | if (flags & PTE_LM) |
270 | pte |= GEN12_GGTT_PTE_LM; |
271 | |
272 | if (pat_index & BIT(0)) |
273 | pte |= MTL_GGTT_PTE_PAT0; |
274 | |
275 | if (pat_index & BIT(1)) |
276 | pte |= MTL_GGTT_PTE_PAT1; |
277 | |
278 | return pte; |
279 | } |
280 | |
281 | u64 gen8_ggtt_pte_encode(dma_addr_t addr, |
282 | unsigned int pat_index, |
283 | u32 flags) |
284 | { |
285 | gen8_pte_t pte = addr | GEN8_PAGE_PRESENT; |
286 | |
287 | if (flags & PTE_LM) |
288 | pte |= GEN12_GGTT_PTE_LM; |
289 | |
290 | return pte; |
291 | } |
292 | |
293 | static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt) |
294 | { |
295 | struct intel_gt *gt = ggtt->vm.gt; |
296 | |
297 | return intel_gt_is_bind_context_ready(gt); |
298 | } |
299 | |
300 | static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt, intel_wakeref_t *wakeref) |
301 | { |
302 | struct intel_context *ce; |
303 | struct intel_gt *gt = ggtt->vm.gt; |
304 | |
305 | if (intel_gt_is_wedged(gt)) |
306 | return NULL; |
307 | |
308 | ce = gt->engine[BCS0]->bind_context; |
309 | GEM_BUG_ON(!ce); |
310 | |
311 | /* |
312 | * If the GT is not awake already at this stage then fallback |
313 | * to pci based GGTT update otherwise __intel_wakeref_get_first() |
314 | * would conflict with fs_reclaim trying to allocate memory while |
315 | * doing rpm_resume(). |
316 | */ |
317 | *wakeref = intel_gt_pm_get_if_awake(gt); |
318 | if (!*wakeref) |
319 | return NULL; |
320 | |
321 | intel_engine_pm_get(engine: ce->engine); |
322 | |
323 | return ce; |
324 | } |
325 | |
326 | static void gen8_ggtt_bind_put_ce(struct intel_context *ce, intel_wakeref_t wakeref) |
327 | { |
328 | intel_engine_pm_put(engine: ce->engine); |
329 | intel_gt_pm_put(gt: ce->engine->gt, handle: wakeref); |
330 | } |
331 | |
332 | static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset, |
333 | struct sg_table *pages, u32 num_entries, |
334 | const gen8_pte_t pte) |
335 | { |
336 | struct i915_sched_attr attr = {}; |
337 | struct intel_gt *gt = ggtt->vm.gt; |
338 | const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode; |
339 | struct sgt_iter iter; |
340 | struct i915_request *rq; |
341 | struct intel_context *ce; |
342 | intel_wakeref_t wakeref; |
343 | u32 *cs; |
344 | |
345 | if (!num_entries) |
346 | return true; |
347 | |
348 | ce = gen8_ggtt_bind_get_ce(ggtt, wakeref: &wakeref); |
349 | if (!ce) |
350 | return false; |
351 | |
352 | if (pages) |
353 | iter = __sgt_iter(sgl: pages->sgl, dma: true); |
354 | |
355 | while (num_entries) { |
356 | int count = 0; |
357 | dma_addr_t addr; |
358 | /* |
359 | * MI_UPDATE_GTT can update 512 entries in a single command but |
360 | * that end up with engine reset, 511 works. |
361 | */ |
362 | u32 n_ptes = min_t(u32, 511, num_entries); |
363 | |
364 | if (mutex_lock_interruptible(&ce->timeline->mutex)) |
365 | goto put_ce; |
366 | |
367 | intel_context_enter(ce); |
368 | rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC); |
369 | intel_context_exit(ce); |
370 | if (IS_ERR(ptr: rq)) { |
371 | GT_TRACE(gt, "Failed to get bind request\n" ); |
372 | mutex_unlock(lock: &ce->timeline->mutex); |
373 | goto put_ce; |
374 | } |
375 | |
376 | cs = intel_ring_begin(rq, num_dwords: 2 * n_ptes + 2); |
377 | if (IS_ERR(ptr: cs)) { |
378 | GT_TRACE(gt, "Failed to ring space for GGTT bind\n" ); |
379 | i915_request_set_error_once(rq, error: PTR_ERR(ptr: cs)); |
380 | /* once a request is created, it must be queued */ |
381 | goto queue_err_rq; |
382 | } |
383 | |
384 | *cs++ = MI_UPDATE_GTT | (2 * n_ptes); |
385 | *cs++ = offset << 12; |
386 | |
387 | if (pages) { |
388 | for_each_sgt_daddr_next(addr, iter) { |
389 | if (count == n_ptes) |
390 | break; |
391 | *cs++ = lower_32_bits(pte | addr); |
392 | *cs++ = upper_32_bits(pte | addr); |
393 | count++; |
394 | } |
395 | /* fill remaining with scratch pte, if any */ |
396 | if (count < n_ptes) { |
397 | memset64(s: (u64 *)cs, v: scratch_pte, |
398 | n: n_ptes - count); |
399 | cs += (n_ptes - count) * 2; |
400 | } |
401 | } else { |
402 | memset64(s: (u64 *)cs, v: pte, n: n_ptes); |
403 | cs += n_ptes * 2; |
404 | } |
405 | |
406 | intel_ring_advance(rq, cs); |
407 | queue_err_rq: |
408 | i915_request_get(rq); |
409 | __i915_request_commit(request: rq); |
410 | __i915_request_queue(rq, attr: &attr); |
411 | |
412 | mutex_unlock(lock: &ce->timeline->mutex); |
413 | /* This will break if the request is complete or after engine reset */ |
414 | i915_request_wait(rq, flags: 0, MAX_SCHEDULE_TIMEOUT); |
415 | if (rq->fence.error) |
416 | goto err_rq; |
417 | |
418 | i915_request_put(rq); |
419 | |
420 | num_entries -= n_ptes; |
421 | offset += n_ptes; |
422 | } |
423 | |
424 | gen8_ggtt_bind_put_ce(ce, wakeref); |
425 | return true; |
426 | |
427 | err_rq: |
428 | i915_request_put(rq); |
429 | put_ce: |
430 | gen8_ggtt_bind_put_ce(ce, wakeref); |
431 | return false; |
432 | } |
433 | |
434 | static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte) |
435 | { |
436 | writeq(val: pte, addr); |
437 | } |
438 | |
439 | static void gen8_ggtt_insert_page(struct i915_address_space *vm, |
440 | dma_addr_t addr, |
441 | u64 offset, |
442 | unsigned int pat_index, |
443 | u32 flags) |
444 | { |
445 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
446 | gen8_pte_t __iomem *pte = |
447 | (gen8_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; |
448 | |
449 | gen8_set_pte(addr: pte, pte: ggtt->vm.pte_encode(addr, pat_index, flags)); |
450 | |
451 | ggtt->invalidate(ggtt); |
452 | } |
453 | |
454 | static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm, |
455 | dma_addr_t addr, u64 offset, |
456 | unsigned int pat_index, u32 flags) |
457 | { |
458 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
459 | gen8_pte_t pte; |
460 | |
461 | pte = ggtt->vm.pte_encode(addr, pat_index, flags); |
462 | if (should_update_ggtt_with_bind(ggtt: i915_vm_to_ggtt(vm)) && |
463 | gen8_ggtt_bind_ptes(ggtt, offset, NULL, num_entries: 1, pte)) |
464 | return ggtt->invalidate(ggtt); |
465 | |
466 | gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags); |
467 | } |
468 | |
469 | static void gen8_ggtt_insert_entries(struct i915_address_space *vm, |
470 | struct i915_vma_resource *vma_res, |
471 | unsigned int pat_index, |
472 | u32 flags) |
473 | { |
474 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
475 | const gen8_pte_t pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); |
476 | gen8_pte_t __iomem *gte; |
477 | gen8_pte_t __iomem *end; |
478 | struct sgt_iter iter; |
479 | dma_addr_t addr; |
480 | |
481 | /* |
482 | * Note that we ignore PTE_READ_ONLY here. The caller must be careful |
483 | * not to allow the user to override access to a read only page. |
484 | */ |
485 | |
486 | gte = (gen8_pte_t __iomem *)ggtt->gsm; |
487 | gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; |
488 | end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; |
489 | while (gte < end) |
490 | gen8_set_pte(addr: gte++, pte: vm->scratch[0]->encode); |
491 | end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; |
492 | |
493 | for_each_sgt_daddr(addr, iter, vma_res->bi.pages) |
494 | gen8_set_pte(addr: gte++, pte: pte_encode | addr); |
495 | GEM_BUG_ON(gte > end); |
496 | |
497 | /* Fill the allocated but "unused" space beyond the end of the buffer */ |
498 | while (gte < end) |
499 | gen8_set_pte(addr: gte++, pte: vm->scratch[0]->encode); |
500 | |
501 | /* |
502 | * We want to flush the TLBs only after we're certain all the PTE |
503 | * updates have finished. |
504 | */ |
505 | ggtt->invalidate(ggtt); |
506 | } |
507 | |
508 | static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, |
509 | struct i915_vma_resource *vma_res, |
510 | unsigned int pat_index, u32 flags) |
511 | { |
512 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
513 | gen8_pte_t scratch_pte = vm->scratch[0]->encode; |
514 | gen8_pte_t pte_encode; |
515 | u64 start, end; |
516 | |
517 | pte_encode = ggtt->vm.pte_encode(0, pat_index, flags); |
518 | start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; |
519 | end = start + vma_res->guard / I915_GTT_PAGE_SIZE; |
520 | if (!gen8_ggtt_bind_ptes(ggtt, offset: start, NULL, num_entries: end - start, pte: scratch_pte)) |
521 | goto err; |
522 | |
523 | start = end; |
524 | end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; |
525 | if (!gen8_ggtt_bind_ptes(ggtt, offset: start, pages: vma_res->bi.pages, |
526 | num_entries: vma_res->node_size / I915_GTT_PAGE_SIZE, pte: pte_encode)) |
527 | goto err; |
528 | |
529 | start += vma_res->node_size / I915_GTT_PAGE_SIZE; |
530 | if (!gen8_ggtt_bind_ptes(ggtt, offset: start, NULL, num_entries: end - start, pte: scratch_pte)) |
531 | goto err; |
532 | |
533 | return true; |
534 | |
535 | err: |
536 | return false; |
537 | } |
538 | |
539 | static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm, |
540 | struct i915_vma_resource *vma_res, |
541 | unsigned int pat_index, u32 flags) |
542 | { |
543 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
544 | |
545 | if (should_update_ggtt_with_bind(ggtt: i915_vm_to_ggtt(vm)) && |
546 | __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags)) |
547 | return ggtt->invalidate(ggtt); |
548 | |
549 | gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags); |
550 | } |
551 | |
552 | static void gen8_ggtt_clear_range(struct i915_address_space *vm, |
553 | u64 start, u64 length) |
554 | { |
555 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
556 | unsigned int first_entry = start / I915_GTT_PAGE_SIZE; |
557 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; |
558 | const gen8_pte_t scratch_pte = vm->scratch[0]->encode; |
559 | gen8_pte_t __iomem *gtt_base = |
560 | (gen8_pte_t __iomem *)ggtt->gsm + first_entry; |
561 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; |
562 | int i; |
563 | |
564 | if (WARN(num_entries > max_entries, |
565 | "First entry = %d; Num entries = %d (max=%d)\n" , |
566 | first_entry, num_entries, max_entries)) |
567 | num_entries = max_entries; |
568 | |
569 | for (i = 0; i < num_entries; i++) |
570 | gen8_set_pte(addr: >t_base[i], pte: scratch_pte); |
571 | } |
572 | |
573 | static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm, |
574 | u64 start, u64 length) |
575 | { |
576 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
577 | unsigned int first_entry = start / I915_GTT_PAGE_SIZE; |
578 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; |
579 | const gen8_pte_t scratch_pte = vm->scratch[0]->encode; |
580 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; |
581 | |
582 | if (WARN(num_entries > max_entries, |
583 | "First entry = %d; Num entries = %d (max=%d)\n" , |
584 | first_entry, num_entries, max_entries)) |
585 | num_entries = max_entries; |
586 | |
587 | if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt, offset: first_entry, |
588 | NULL, num_entries, pte: scratch_pte)) |
589 | return ggtt->invalidate(ggtt); |
590 | |
591 | gen8_ggtt_clear_range(vm, start, length); |
592 | } |
593 | |
594 | static void gen6_ggtt_insert_page(struct i915_address_space *vm, |
595 | dma_addr_t addr, |
596 | u64 offset, |
597 | unsigned int pat_index, |
598 | u32 flags) |
599 | { |
600 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
601 | gen6_pte_t __iomem *pte = |
602 | (gen6_pte_t __iomem *)ggtt->gsm + offset / I915_GTT_PAGE_SIZE; |
603 | |
604 | iowrite32(vm->pte_encode(addr, pat_index, flags), pte); |
605 | |
606 | ggtt->invalidate(ggtt); |
607 | } |
608 | |
609 | /* |
610 | * Binds an object into the global gtt with the specified cache level. |
611 | * The object will be accessible to the GPU via commands whose operands |
612 | * reference offsets within the global GTT as well as accessible by the GPU |
613 | * through the GMADR mapped BAR (i915->mm.gtt->gtt). |
614 | */ |
615 | static void gen6_ggtt_insert_entries(struct i915_address_space *vm, |
616 | struct i915_vma_resource *vma_res, |
617 | unsigned int pat_index, |
618 | u32 flags) |
619 | { |
620 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
621 | gen6_pte_t __iomem *gte; |
622 | gen6_pte_t __iomem *end; |
623 | struct sgt_iter iter; |
624 | dma_addr_t addr; |
625 | |
626 | gte = (gen6_pte_t __iomem *)ggtt->gsm; |
627 | gte += (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE; |
628 | |
629 | end = gte + vma_res->guard / I915_GTT_PAGE_SIZE; |
630 | while (gte < end) |
631 | iowrite32(vm->scratch[0]->encode, gte++); |
632 | end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE; |
633 | for_each_sgt_daddr(addr, iter, vma_res->bi.pages) |
634 | iowrite32(vm->pte_encode(addr, pat_index, flags), gte++); |
635 | GEM_BUG_ON(gte > end); |
636 | |
637 | /* Fill the allocated but "unused" space beyond the end of the buffer */ |
638 | while (gte < end) |
639 | iowrite32(vm->scratch[0]->encode, gte++); |
640 | |
641 | /* |
642 | * We want to flush the TLBs only after we're certain all the PTE |
643 | * updates have finished. |
644 | */ |
645 | ggtt->invalidate(ggtt); |
646 | } |
647 | |
648 | static void nop_clear_range(struct i915_address_space *vm, |
649 | u64 start, u64 length) |
650 | { |
651 | } |
652 | |
653 | static void bxt_vtd_ggtt_wa(struct i915_address_space *vm) |
654 | { |
655 | /* |
656 | * Make sure the internal GAM fifo has been cleared of all GTT |
657 | * writes before exiting stop_machine(). This guarantees that |
658 | * any aperture accesses waiting to start in another process |
659 | * cannot back up behind the GTT writes causing a hang. |
660 | * The register can be any arbitrary GAM register. |
661 | */ |
662 | intel_uncore_posting_read_fw(vm->gt->uncore, GFX_FLSH_CNTL_GEN6); |
663 | } |
664 | |
665 | struct insert_page { |
666 | struct i915_address_space *vm; |
667 | dma_addr_t addr; |
668 | u64 offset; |
669 | unsigned int pat_index; |
670 | }; |
671 | |
672 | static int bxt_vtd_ggtt_insert_page__cb(void *_arg) |
673 | { |
674 | struct insert_page *arg = _arg; |
675 | |
676 | gen8_ggtt_insert_page(vm: arg->vm, addr: arg->addr, offset: arg->offset, |
677 | pat_index: arg->pat_index, flags: 0); |
678 | bxt_vtd_ggtt_wa(vm: arg->vm); |
679 | |
680 | return 0; |
681 | } |
682 | |
683 | static void bxt_vtd_ggtt_insert_page__BKL(struct i915_address_space *vm, |
684 | dma_addr_t addr, |
685 | u64 offset, |
686 | unsigned int pat_index, |
687 | u32 unused) |
688 | { |
689 | struct insert_page arg = { vm, addr, offset, pat_index }; |
690 | |
691 | stop_machine(fn: bxt_vtd_ggtt_insert_page__cb, data: &arg, NULL); |
692 | } |
693 | |
694 | struct insert_entries { |
695 | struct i915_address_space *vm; |
696 | struct i915_vma_resource *vma_res; |
697 | unsigned int pat_index; |
698 | u32 flags; |
699 | }; |
700 | |
701 | static int bxt_vtd_ggtt_insert_entries__cb(void *_arg) |
702 | { |
703 | struct insert_entries *arg = _arg; |
704 | |
705 | gen8_ggtt_insert_entries(vm: arg->vm, vma_res: arg->vma_res, |
706 | pat_index: arg->pat_index, flags: arg->flags); |
707 | bxt_vtd_ggtt_wa(vm: arg->vm); |
708 | |
709 | return 0; |
710 | } |
711 | |
712 | static void bxt_vtd_ggtt_insert_entries__BKL(struct i915_address_space *vm, |
713 | struct i915_vma_resource *vma_res, |
714 | unsigned int pat_index, |
715 | u32 flags) |
716 | { |
717 | struct insert_entries arg = { vm, vma_res, pat_index, flags }; |
718 | |
719 | stop_machine(fn: bxt_vtd_ggtt_insert_entries__cb, data: &arg, NULL); |
720 | } |
721 | |
722 | static void gen6_ggtt_clear_range(struct i915_address_space *vm, |
723 | u64 start, u64 length) |
724 | { |
725 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
726 | unsigned int first_entry = start / I915_GTT_PAGE_SIZE; |
727 | unsigned int num_entries = length / I915_GTT_PAGE_SIZE; |
728 | gen6_pte_t scratch_pte, __iomem *gtt_base = |
729 | (gen6_pte_t __iomem *)ggtt->gsm + first_entry; |
730 | const int max_entries = ggtt_total_entries(ggtt) - first_entry; |
731 | int i; |
732 | |
733 | if (WARN(num_entries > max_entries, |
734 | "First entry = %d; Num entries = %d (max=%d)\n" , |
735 | first_entry, num_entries, max_entries)) |
736 | num_entries = max_entries; |
737 | |
738 | scratch_pte = vm->scratch[0]->encode; |
739 | for (i = 0; i < num_entries; i++) |
740 | iowrite32(scratch_pte, >t_base[i]); |
741 | } |
742 | |
743 | void intel_ggtt_bind_vma(struct i915_address_space *vm, |
744 | struct i915_vm_pt_stash *stash, |
745 | struct i915_vma_resource *vma_res, |
746 | unsigned int pat_index, |
747 | u32 flags) |
748 | { |
749 | u32 pte_flags; |
750 | |
751 | if (vma_res->bound_flags & (~flags & I915_VMA_BIND_MASK)) |
752 | return; |
753 | |
754 | vma_res->bound_flags |= flags; |
755 | |
756 | /* Applicable to VLV (gen8+ do not support RO in the GGTT) */ |
757 | pte_flags = 0; |
758 | if (vma_res->bi.readonly) |
759 | pte_flags |= PTE_READ_ONLY; |
760 | if (vma_res->bi.lmem) |
761 | pte_flags |= PTE_LM; |
762 | |
763 | vm->insert_entries(vm, vma_res, pat_index, pte_flags); |
764 | vma_res->page_sizes_gtt = I915_GTT_PAGE_SIZE; |
765 | } |
766 | |
767 | void intel_ggtt_unbind_vma(struct i915_address_space *vm, |
768 | struct i915_vma_resource *vma_res) |
769 | { |
770 | vm->clear_range(vm, vma_res->start, vma_res->vma_size); |
771 | } |
772 | |
773 | /* |
774 | * Reserve the top of the GuC address space for firmware images. Addresses |
775 | * beyond GUC_GGTT_TOP in the GuC address space are inaccessible by GuC, |
776 | * which makes for a suitable range to hold GuC/HuC firmware images if the |
777 | * size of the GGTT is 4G. However, on a 32-bit platform the size of the GGTT |
778 | * is limited to 2G, which is less than GUC_GGTT_TOP, but we reserve a chunk |
779 | * of the same size anyway, which is far more than needed, to keep the logic |
780 | * in uc_fw_ggtt_offset() simple. |
781 | */ |
782 | #define GUC_TOP_RESERVE_SIZE (SZ_4G - GUC_GGTT_TOP) |
783 | |
784 | static int ggtt_reserve_guc_top(struct i915_ggtt *ggtt) |
785 | { |
786 | u64 offset; |
787 | int ret; |
788 | |
789 | if (!intel_uc_uses_guc(uc: &ggtt->vm.gt->uc)) |
790 | return 0; |
791 | |
792 | GEM_BUG_ON(ggtt->vm.total <= GUC_TOP_RESERVE_SIZE); |
793 | offset = ggtt->vm.total - GUC_TOP_RESERVE_SIZE; |
794 | |
795 | ret = i915_gem_gtt_reserve(vm: &ggtt->vm, NULL, node: &ggtt->uc_fw, |
796 | GUC_TOP_RESERVE_SIZE, offset, |
797 | I915_COLOR_UNEVICTABLE, PIN_NOEVICT); |
798 | if (ret) |
799 | drm_dbg(&ggtt->vm.i915->drm, |
800 | "Failed to reserve top of GGTT for GuC\n" ); |
801 | |
802 | return ret; |
803 | } |
804 | |
805 | static void ggtt_release_guc_top(struct i915_ggtt *ggtt) |
806 | { |
807 | if (drm_mm_node_allocated(node: &ggtt->uc_fw)) |
808 | drm_mm_remove_node(node: &ggtt->uc_fw); |
809 | } |
810 | |
811 | static void cleanup_init_ggtt(struct i915_ggtt *ggtt) |
812 | { |
813 | ggtt_release_guc_top(ggtt); |
814 | if (drm_mm_node_allocated(node: &ggtt->error_capture)) |
815 | drm_mm_remove_node(node: &ggtt->error_capture); |
816 | mutex_destroy(lock: &ggtt->error_mutex); |
817 | } |
818 | |
819 | static int init_ggtt(struct i915_ggtt *ggtt) |
820 | { |
821 | /* |
822 | * Let GEM Manage all of the aperture. |
823 | * |
824 | * However, leave one page at the end still bound to the scratch page. |
825 | * There are a number of places where the hardware apparently prefetches |
826 | * past the end of the object, and we've seen multiple hangs with the |
827 | * GPU head pointer stuck in a batchbuffer bound at the last page of the |
828 | * aperture. One page should be enough to keep any prefetching inside |
829 | * of the aperture. |
830 | */ |
831 | unsigned long hole_start, hole_end; |
832 | struct drm_mm_node *entry; |
833 | int ret; |
834 | |
835 | /* |
836 | * GuC requires all resources that we're sharing with it to be placed in |
837 | * non-WOPCM memory. If GuC is not present or not in use we still need a |
838 | * small bias as ring wraparound at offset 0 sometimes hangs. No idea |
839 | * why. |
840 | */ |
841 | ggtt->pin_bias = max_t(u32, I915_GTT_PAGE_SIZE, |
842 | intel_wopcm_guc_size(&ggtt->vm.gt->wopcm)); |
843 | |
844 | ret = intel_vgt_balloon(ggtt); |
845 | if (ret) |
846 | return ret; |
847 | |
848 | mutex_init(&ggtt->error_mutex); |
849 | if (ggtt->mappable_end) { |
850 | /* |
851 | * Reserve a mappable slot for our lockless error capture. |
852 | * |
853 | * We strongly prefer taking address 0x0 in order to protect |
854 | * other critical buffers against accidental overwrites, |
855 | * as writing to address 0 is a very common mistake. |
856 | * |
857 | * Since 0 may already be in use by the system (e.g. the BIOS |
858 | * framebuffer), we let the reservation fail quietly and hope |
859 | * 0 remains reserved always. |
860 | * |
861 | * If we fail to reserve 0, and then fail to find any space |
862 | * for an error-capture, remain silent. We can afford not |
863 | * to reserve an error_capture node as we have fallback |
864 | * paths, and we trust that 0 will remain reserved. However, |
865 | * the only likely reason for failure to insert is a driver |
866 | * bug, which we expect to cause other failures... |
867 | * |
868 | * Since CPU can perform speculative reads on error capture |
869 | * (write-combining allows it) add scratch page after error |
870 | * capture to avoid DMAR errors. |
871 | */ |
872 | ggtt->error_capture.size = 2 * I915_GTT_PAGE_SIZE; |
873 | ggtt->error_capture.color = I915_COLOR_UNEVICTABLE; |
874 | if (drm_mm_reserve_node(mm: &ggtt->vm.mm, node: &ggtt->error_capture)) |
875 | drm_mm_insert_node_in_range(mm: &ggtt->vm.mm, |
876 | node: &ggtt->error_capture, |
877 | size: ggtt->error_capture.size, alignment: 0, |
878 | color: ggtt->error_capture.color, |
879 | start: 0, end: ggtt->mappable_end, |
880 | mode: DRM_MM_INSERT_LOW); |
881 | } |
882 | if (drm_mm_node_allocated(node: &ggtt->error_capture)) { |
883 | u64 start = ggtt->error_capture.start; |
884 | u64 size = ggtt->error_capture.size; |
885 | |
886 | ggtt->vm.scratch_range(&ggtt->vm, start, size); |
887 | drm_dbg(&ggtt->vm.i915->drm, |
888 | "Reserved GGTT:[%llx, %llx] for use by error capture\n" , |
889 | start, start + size); |
890 | } |
891 | |
892 | /* |
893 | * The upper portion of the GuC address space has a sizeable hole |
894 | * (several MB) that is inaccessible by GuC. Reserve this range within |
895 | * GGTT as it can comfortably hold GuC/HuC firmware images. |
896 | */ |
897 | ret = ggtt_reserve_guc_top(ggtt); |
898 | if (ret) |
899 | goto err; |
900 | |
901 | /* Clear any non-preallocated blocks */ |
902 | drm_mm_for_each_hole(entry, &ggtt->vm.mm, hole_start, hole_end) { |
903 | drm_dbg(&ggtt->vm.i915->drm, |
904 | "clearing unused GTT space: [%lx, %lx]\n" , |
905 | hole_start, hole_end); |
906 | ggtt->vm.clear_range(&ggtt->vm, hole_start, |
907 | hole_end - hole_start); |
908 | } |
909 | |
910 | /* And finally clear the reserved guard page */ |
911 | ggtt->vm.clear_range(&ggtt->vm, ggtt->vm.total - PAGE_SIZE, PAGE_SIZE); |
912 | |
913 | return 0; |
914 | |
915 | err: |
916 | cleanup_init_ggtt(ggtt); |
917 | return ret; |
918 | } |
919 | |
920 | static void aliasing_gtt_bind_vma(struct i915_address_space *vm, |
921 | struct i915_vm_pt_stash *stash, |
922 | struct i915_vma_resource *vma_res, |
923 | unsigned int pat_index, |
924 | u32 flags) |
925 | { |
926 | u32 pte_flags; |
927 | |
928 | /* Currently applicable only to VLV */ |
929 | pte_flags = 0; |
930 | if (vma_res->bi.readonly) |
931 | pte_flags |= PTE_READ_ONLY; |
932 | |
933 | if (flags & I915_VMA_LOCAL_BIND) |
934 | ppgtt_bind_vma(vm: &i915_vm_to_ggtt(vm)->alias->vm, |
935 | stash, vma_res, pat_index, flags); |
936 | |
937 | if (flags & I915_VMA_GLOBAL_BIND) |
938 | vm->insert_entries(vm, vma_res, pat_index, pte_flags); |
939 | |
940 | vma_res->bound_flags |= flags; |
941 | } |
942 | |
943 | static void aliasing_gtt_unbind_vma(struct i915_address_space *vm, |
944 | struct i915_vma_resource *vma_res) |
945 | { |
946 | if (vma_res->bound_flags & I915_VMA_GLOBAL_BIND) |
947 | vm->clear_range(vm, vma_res->start, vma_res->vma_size); |
948 | |
949 | if (vma_res->bound_flags & I915_VMA_LOCAL_BIND) |
950 | ppgtt_unbind_vma(vm: &i915_vm_to_ggtt(vm)->alias->vm, vma_res); |
951 | } |
952 | |
953 | static int init_aliasing_ppgtt(struct i915_ggtt *ggtt) |
954 | { |
955 | struct i915_vm_pt_stash stash = {}; |
956 | struct i915_ppgtt *ppgtt; |
957 | int err; |
958 | |
959 | ppgtt = i915_ppgtt_create(gt: ggtt->vm.gt, lmem_pt_obj_flags: 0); |
960 | if (IS_ERR(ptr: ppgtt)) |
961 | return PTR_ERR(ptr: ppgtt); |
962 | |
963 | if (GEM_WARN_ON(ppgtt->vm.total < ggtt->vm.total)) { |
964 | err = -ENODEV; |
965 | goto err_ppgtt; |
966 | } |
967 | |
968 | err = i915_vm_alloc_pt_stash(vm: &ppgtt->vm, stash: &stash, size: ggtt->vm.total); |
969 | if (err) |
970 | goto err_ppgtt; |
971 | |
972 | i915_gem_object_lock(obj: ppgtt->vm.scratch[0], NULL); |
973 | err = i915_vm_map_pt_stash(vm: &ppgtt->vm, stash: &stash); |
974 | i915_gem_object_unlock(obj: ppgtt->vm.scratch[0]); |
975 | if (err) |
976 | goto err_stash; |
977 | |
978 | /* |
979 | * Note we only pre-allocate as far as the end of the global |
980 | * GTT. On 48b / 4-level page-tables, the difference is very, |
981 | * very significant! We have to preallocate as GVT/vgpu does |
982 | * not like the page directory disappearing. |
983 | */ |
984 | ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, ggtt->vm.total); |
985 | |
986 | ggtt->alias = ppgtt; |
987 | ggtt->vm.bind_async_flags |= ppgtt->vm.bind_async_flags; |
988 | |
989 | GEM_BUG_ON(ggtt->vm.vma_ops.bind_vma != intel_ggtt_bind_vma); |
990 | ggtt->vm.vma_ops.bind_vma = aliasing_gtt_bind_vma; |
991 | |
992 | GEM_BUG_ON(ggtt->vm.vma_ops.unbind_vma != intel_ggtt_unbind_vma); |
993 | ggtt->vm.vma_ops.unbind_vma = aliasing_gtt_unbind_vma; |
994 | |
995 | i915_vm_free_pt_stash(vm: &ppgtt->vm, stash: &stash); |
996 | return 0; |
997 | |
998 | err_stash: |
999 | i915_vm_free_pt_stash(vm: &ppgtt->vm, stash: &stash); |
1000 | err_ppgtt: |
1001 | i915_vm_put(vm: &ppgtt->vm); |
1002 | return err; |
1003 | } |
1004 | |
1005 | static void fini_aliasing_ppgtt(struct i915_ggtt *ggtt) |
1006 | { |
1007 | struct i915_ppgtt *ppgtt; |
1008 | |
1009 | ppgtt = fetch_and_zero(&ggtt->alias); |
1010 | if (!ppgtt) |
1011 | return; |
1012 | |
1013 | i915_vm_put(vm: &ppgtt->vm); |
1014 | |
1015 | ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; |
1016 | ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; |
1017 | } |
1018 | |
1019 | int i915_init_ggtt(struct drm_i915_private *i915) |
1020 | { |
1021 | int ret; |
1022 | |
1023 | ret = init_ggtt(ggtt: to_gt(i915)->ggtt); |
1024 | if (ret) |
1025 | return ret; |
1026 | |
1027 | if (INTEL_PPGTT(i915) == INTEL_PPGTT_ALIASING) { |
1028 | ret = init_aliasing_ppgtt(ggtt: to_gt(i915)->ggtt); |
1029 | if (ret) |
1030 | cleanup_init_ggtt(ggtt: to_gt(i915)->ggtt); |
1031 | } |
1032 | |
1033 | return 0; |
1034 | } |
1035 | |
1036 | static void ggtt_cleanup_hw(struct i915_ggtt *ggtt) |
1037 | { |
1038 | struct i915_vma *vma, *vn; |
1039 | |
1040 | flush_workqueue(ggtt->vm.i915->wq); |
1041 | i915_gem_drain_freed_objects(i915: ggtt->vm.i915); |
1042 | |
1043 | mutex_lock(&ggtt->vm.mutex); |
1044 | |
1045 | ggtt->vm.skip_pte_rewrite = true; |
1046 | |
1047 | list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { |
1048 | struct drm_i915_gem_object *obj = vma->obj; |
1049 | bool trylock; |
1050 | |
1051 | trylock = i915_gem_object_trylock(obj, NULL); |
1052 | WARN_ON(!trylock); |
1053 | |
1054 | WARN_ON(__i915_vma_unbind(vma)); |
1055 | if (trylock) |
1056 | i915_gem_object_unlock(obj); |
1057 | } |
1058 | |
1059 | if (drm_mm_node_allocated(node: &ggtt->error_capture)) |
1060 | drm_mm_remove_node(node: &ggtt->error_capture); |
1061 | mutex_destroy(lock: &ggtt->error_mutex); |
1062 | |
1063 | ggtt_release_guc_top(ggtt); |
1064 | intel_vgt_deballoon(ggtt); |
1065 | |
1066 | ggtt->vm.cleanup(&ggtt->vm); |
1067 | |
1068 | mutex_unlock(lock: &ggtt->vm.mutex); |
1069 | i915_address_space_fini(vm: &ggtt->vm); |
1070 | |
1071 | arch_phys_wc_del(handle: ggtt->mtrr); |
1072 | |
1073 | if (ggtt->iomap.size) |
1074 | io_mapping_fini(mapping: &ggtt->iomap); |
1075 | } |
1076 | |
1077 | /** |
1078 | * i915_ggtt_driver_release - Clean up GGTT hardware initialization |
1079 | * @i915: i915 device |
1080 | */ |
1081 | void i915_ggtt_driver_release(struct drm_i915_private *i915) |
1082 | { |
1083 | struct i915_ggtt *ggtt = to_gt(i915)->ggtt; |
1084 | |
1085 | fini_aliasing_ppgtt(ggtt); |
1086 | |
1087 | intel_ggtt_fini_fences(ggtt); |
1088 | ggtt_cleanup_hw(ggtt); |
1089 | } |
1090 | |
1091 | /** |
1092 | * i915_ggtt_driver_late_release - Cleanup of GGTT that needs to be done after |
1093 | * all free objects have been drained. |
1094 | * @i915: i915 device |
1095 | */ |
1096 | void i915_ggtt_driver_late_release(struct drm_i915_private *i915) |
1097 | { |
1098 | struct i915_ggtt *ggtt = to_gt(i915)->ggtt; |
1099 | |
1100 | GEM_WARN_ON(kref_read(&ggtt->vm.resv_ref) != 1); |
1101 | dma_resv_fini(obj: &ggtt->vm._resv); |
1102 | } |
1103 | |
1104 | static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl) |
1105 | { |
1106 | snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT; |
1107 | snb_gmch_ctl &= SNB_GMCH_GGMS_MASK; |
1108 | return snb_gmch_ctl << 20; |
1109 | } |
1110 | |
1111 | static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl) |
1112 | { |
1113 | bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT; |
1114 | bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK; |
1115 | if (bdw_gmch_ctl) |
1116 | bdw_gmch_ctl = 1 << bdw_gmch_ctl; |
1117 | |
1118 | #ifdef CONFIG_X86_32 |
1119 | /* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * I915_GTT_PAGE_SIZE */ |
1120 | if (bdw_gmch_ctl > 4) |
1121 | bdw_gmch_ctl = 4; |
1122 | #endif |
1123 | |
1124 | return bdw_gmch_ctl << 20; |
1125 | } |
1126 | |
1127 | static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl) |
1128 | { |
1129 | gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT; |
1130 | gmch_ctrl &= SNB_GMCH_GGMS_MASK; |
1131 | |
1132 | if (gmch_ctrl) |
1133 | return 1 << (20 + gmch_ctrl); |
1134 | |
1135 | return 0; |
1136 | } |
1137 | |
1138 | static unsigned int gen6_gttmmadr_size(struct drm_i915_private *i915) |
1139 | { |
1140 | /* |
1141 | * GEN6: GTTMMADR size is 4MB and GTTADR starts at 2MB offset |
1142 | * GEN8: GTTMMADR size is 16MB and GTTADR starts at 8MB offset |
1143 | */ |
1144 | GEM_BUG_ON(GRAPHICS_VER(i915) < 6); |
1145 | return (GRAPHICS_VER(i915) < 8) ? SZ_4M : SZ_16M; |
1146 | } |
1147 | |
1148 | static unsigned int gen6_gttadr_offset(struct drm_i915_private *i915) |
1149 | { |
1150 | return gen6_gttmmadr_size(i915) / 2; |
1151 | } |
1152 | |
1153 | static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size) |
1154 | { |
1155 | struct drm_i915_private *i915 = ggtt->vm.i915; |
1156 | struct intel_uncore *uncore = ggtt->vm.gt->uncore; |
1157 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); |
1158 | phys_addr_t phys_addr; |
1159 | u32 pte_flags; |
1160 | int ret; |
1161 | |
1162 | GEM_WARN_ON(pci_resource_len(pdev, GEN4_GTTMMADR_BAR) != gen6_gttmmadr_size(i915)); |
1163 | |
1164 | if (i915_direct_stolen_access(i915)) { |
1165 | drm_dbg(&i915->drm, "Using direct GSM access\n" ); |
1166 | phys_addr = intel_uncore_read64(uncore, GEN6_GSMBASE) & GEN11_BDSM_MASK; |
1167 | } else { |
1168 | phys_addr = pci_resource_start(pdev, GEN4_GTTMMADR_BAR) + gen6_gttadr_offset(i915); |
1169 | } |
1170 | |
1171 | if (needs_wc_ggtt_mapping(i915)) |
1172 | ggtt->gsm = ioremap_wc(offset: phys_addr, size); |
1173 | else |
1174 | ggtt->gsm = ioremap(offset: phys_addr, size); |
1175 | |
1176 | if (!ggtt->gsm) { |
1177 | drm_err(&i915->drm, "Failed to map the ggtt page table\n" ); |
1178 | return -ENOMEM; |
1179 | } |
1180 | |
1181 | kref_init(kref: &ggtt->vm.resv_ref); |
1182 | ret = setup_scratch_page(&ggtt->vm); |
1183 | if (ret) { |
1184 | drm_err(&i915->drm, "Scratch setup failed\n" ); |
1185 | /* iounmap will also get called at remove, but meh */ |
1186 | iounmap(addr: ggtt->gsm); |
1187 | return ret; |
1188 | } |
1189 | |
1190 | pte_flags = 0; |
1191 | if (i915_gem_object_is_lmem(obj: ggtt->vm.scratch[0])) |
1192 | pte_flags |= PTE_LM; |
1193 | |
1194 | ggtt->vm.scratch[0]->encode = |
1195 | ggtt->vm.pte_encode(px_dma(ggtt->vm.scratch[0]), |
1196 | i915_gem_get_pat_index(i915, |
1197 | level: I915_CACHE_NONE), |
1198 | pte_flags); |
1199 | |
1200 | return 0; |
1201 | } |
1202 | |
1203 | static void gen6_gmch_remove(struct i915_address_space *vm) |
1204 | { |
1205 | struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm); |
1206 | |
1207 | iounmap(addr: ggtt->gsm); |
1208 | free_scratch(vm); |
1209 | } |
1210 | |
1211 | static struct resource pci_resource(struct pci_dev *pdev, int bar) |
1212 | { |
1213 | return DEFINE_RES_MEM(pci_resource_start(pdev, bar), |
1214 | pci_resource_len(pdev, bar)); |
1215 | } |
1216 | |
1217 | static int gen8_gmch_probe(struct i915_ggtt *ggtt) |
1218 | { |
1219 | struct drm_i915_private *i915 = ggtt->vm.i915; |
1220 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); |
1221 | unsigned int size; |
1222 | u16 snb_gmch_ctl; |
1223 | |
1224 | if (!HAS_LMEM(i915) && !HAS_LMEMBAR_SMEM_STOLEN(i915)) { |
1225 | if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) |
1226 | return -ENXIO; |
1227 | |
1228 | ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); |
1229 | ggtt->mappable_end = resource_size(res: &ggtt->gmadr); |
1230 | } |
1231 | |
1232 | pci_read_config_word(dev: pdev, SNB_GMCH_CTRL, val: &snb_gmch_ctl); |
1233 | if (IS_CHERRYVIEW(i915)) |
1234 | size = chv_get_total_gtt_size(gmch_ctrl: snb_gmch_ctl); |
1235 | else |
1236 | size = gen8_get_total_gtt_size(bdw_gmch_ctl: snb_gmch_ctl); |
1237 | |
1238 | ggtt->vm.alloc_pt_dma = alloc_pt_dma; |
1239 | ggtt->vm.alloc_scratch_dma = alloc_pt_dma; |
1240 | ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY; |
1241 | |
1242 | ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE; |
1243 | ggtt->vm.cleanup = gen6_gmch_remove; |
1244 | ggtt->vm.insert_page = gen8_ggtt_insert_page; |
1245 | ggtt->vm.clear_range = nop_clear_range; |
1246 | ggtt->vm.scratch_range = gen8_ggtt_clear_range; |
1247 | |
1248 | ggtt->vm.insert_entries = gen8_ggtt_insert_entries; |
1249 | |
1250 | /* |
1251 | * Serialize GTT updates with aperture access on BXT if VT-d is on, |
1252 | * and always on CHV. |
1253 | */ |
1254 | if (intel_vm_no_concurrent_access_wa(i915)) { |
1255 | ggtt->vm.insert_entries = bxt_vtd_ggtt_insert_entries__BKL; |
1256 | ggtt->vm.insert_page = bxt_vtd_ggtt_insert_page__BKL; |
1257 | |
1258 | /* |
1259 | * Calling stop_machine() version of GGTT update function |
1260 | * at error capture/reset path will raise lockdep warning. |
1261 | * Allow calling gen8_ggtt_insert_* directly at reset path |
1262 | * which is safe from parallel GGTT updates. |
1263 | */ |
1264 | ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; |
1265 | ggtt->vm.raw_insert_entries = gen8_ggtt_insert_entries; |
1266 | |
1267 | ggtt->vm.bind_async_flags = |
1268 | I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND; |
1269 | } |
1270 | |
1271 | if (i915_ggtt_require_binder(i915)) { |
1272 | ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind; |
1273 | ggtt->vm.insert_page = gen8_ggtt_insert_page_bind; |
1274 | ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind; |
1275 | /* |
1276 | * On GPU is hung, we might bind VMAs for error capture. |
1277 | * Fallback to CPU GGTT updates in that case. |
1278 | */ |
1279 | ggtt->vm.raw_insert_page = gen8_ggtt_insert_page; |
1280 | } |
1281 | |
1282 | if (intel_uc_wants_guc_submission(uc: &ggtt->vm.gt->uc)) |
1283 | ggtt->invalidate = guc_ggtt_invalidate; |
1284 | else |
1285 | ggtt->invalidate = gen8_ggtt_invalidate; |
1286 | |
1287 | ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; |
1288 | ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; |
1289 | |
1290 | if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 70)) |
1291 | ggtt->vm.pte_encode = mtl_ggtt_pte_encode; |
1292 | else |
1293 | ggtt->vm.pte_encode = gen8_ggtt_pte_encode; |
1294 | |
1295 | return ggtt_probe_common(ggtt, size); |
1296 | } |
1297 | |
1298 | /* |
1299 | * For pre-gen8 platforms pat_index is the same as enum i915_cache_level, |
1300 | * so the switch-case statements in these PTE encode functions are still valid. |
1301 | * See translation table LEGACY_CACHELEVEL. |
1302 | */ |
1303 | static u64 snb_pte_encode(dma_addr_t addr, |
1304 | unsigned int pat_index, |
1305 | u32 flags) |
1306 | { |
1307 | gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; |
1308 | |
1309 | switch (pat_index) { |
1310 | case I915_CACHE_L3_LLC: |
1311 | case I915_CACHE_LLC: |
1312 | pte |= GEN6_PTE_CACHE_LLC; |
1313 | break; |
1314 | case I915_CACHE_NONE: |
1315 | pte |= GEN6_PTE_UNCACHED; |
1316 | break; |
1317 | default: |
1318 | MISSING_CASE(pat_index); |
1319 | } |
1320 | |
1321 | return pte; |
1322 | } |
1323 | |
1324 | static u64 ivb_pte_encode(dma_addr_t addr, |
1325 | unsigned int pat_index, |
1326 | u32 flags) |
1327 | { |
1328 | gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; |
1329 | |
1330 | switch (pat_index) { |
1331 | case I915_CACHE_L3_LLC: |
1332 | pte |= GEN7_PTE_CACHE_L3_LLC; |
1333 | break; |
1334 | case I915_CACHE_LLC: |
1335 | pte |= GEN6_PTE_CACHE_LLC; |
1336 | break; |
1337 | case I915_CACHE_NONE: |
1338 | pte |= GEN6_PTE_UNCACHED; |
1339 | break; |
1340 | default: |
1341 | MISSING_CASE(pat_index); |
1342 | } |
1343 | |
1344 | return pte; |
1345 | } |
1346 | |
1347 | static u64 byt_pte_encode(dma_addr_t addr, |
1348 | unsigned int pat_index, |
1349 | u32 flags) |
1350 | { |
1351 | gen6_pte_t pte = GEN6_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; |
1352 | |
1353 | if (!(flags & PTE_READ_ONLY)) |
1354 | pte |= BYT_PTE_WRITEABLE; |
1355 | |
1356 | if (pat_index != I915_CACHE_NONE) |
1357 | pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES; |
1358 | |
1359 | return pte; |
1360 | } |
1361 | |
1362 | static u64 hsw_pte_encode(dma_addr_t addr, |
1363 | unsigned int pat_index, |
1364 | u32 flags) |
1365 | { |
1366 | gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; |
1367 | |
1368 | if (pat_index != I915_CACHE_NONE) |
1369 | pte |= HSW_WB_LLC_AGE3; |
1370 | |
1371 | return pte; |
1372 | } |
1373 | |
1374 | static u64 iris_pte_encode(dma_addr_t addr, |
1375 | unsigned int pat_index, |
1376 | u32 flags) |
1377 | { |
1378 | gen6_pte_t pte = HSW_PTE_ADDR_ENCODE(addr) | GEN6_PTE_VALID; |
1379 | |
1380 | switch (pat_index) { |
1381 | case I915_CACHE_NONE: |
1382 | break; |
1383 | case I915_CACHE_WT: |
1384 | pte |= HSW_WT_ELLC_LLC_AGE3; |
1385 | break; |
1386 | default: |
1387 | pte |= HSW_WB_ELLC_LLC_AGE3; |
1388 | break; |
1389 | } |
1390 | |
1391 | return pte; |
1392 | } |
1393 | |
1394 | static int gen6_gmch_probe(struct i915_ggtt *ggtt) |
1395 | { |
1396 | struct drm_i915_private *i915 = ggtt->vm.i915; |
1397 | struct pci_dev *pdev = to_pci_dev(i915->drm.dev); |
1398 | unsigned int size; |
1399 | u16 snb_gmch_ctl; |
1400 | |
1401 | if (!i915_pci_resource_valid(pdev, GEN4_GMADR_BAR)) |
1402 | return -ENXIO; |
1403 | |
1404 | ggtt->gmadr = pci_resource(pdev, GEN4_GMADR_BAR); |
1405 | ggtt->mappable_end = resource_size(res: &ggtt->gmadr); |
1406 | |
1407 | /* |
1408 | * 64/512MB is the current min/max we actually know of, but this is |
1409 | * just a coarse sanity check. |
1410 | */ |
1411 | if (ggtt->mappable_end < (64 << 20) || |
1412 | ggtt->mappable_end > (512 << 20)) { |
1413 | drm_err(&i915->drm, "Unknown GMADR size (%pa)\n" , |
1414 | &ggtt->mappable_end); |
1415 | return -ENXIO; |
1416 | } |
1417 | |
1418 | pci_read_config_word(dev: pdev, SNB_GMCH_CTRL, val: &snb_gmch_ctl); |
1419 | |
1420 | size = gen6_get_total_gtt_size(snb_gmch_ctl); |
1421 | ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE; |
1422 | |
1423 | ggtt->vm.alloc_pt_dma = alloc_pt_dma; |
1424 | ggtt->vm.alloc_scratch_dma = alloc_pt_dma; |
1425 | |
1426 | ggtt->vm.clear_range = nop_clear_range; |
1427 | if (!HAS_FULL_PPGTT(i915)) |
1428 | ggtt->vm.clear_range = gen6_ggtt_clear_range; |
1429 | ggtt->vm.scratch_range = gen6_ggtt_clear_range; |
1430 | ggtt->vm.insert_page = gen6_ggtt_insert_page; |
1431 | ggtt->vm.insert_entries = gen6_ggtt_insert_entries; |
1432 | ggtt->vm.cleanup = gen6_gmch_remove; |
1433 | |
1434 | ggtt->invalidate = gen6_ggtt_invalidate; |
1435 | |
1436 | if (HAS_EDRAM(i915)) |
1437 | ggtt->vm.pte_encode = iris_pte_encode; |
1438 | else if (IS_HASWELL(i915)) |
1439 | ggtt->vm.pte_encode = hsw_pte_encode; |
1440 | else if (IS_VALLEYVIEW(i915)) |
1441 | ggtt->vm.pte_encode = byt_pte_encode; |
1442 | else if (GRAPHICS_VER(i915) >= 7) |
1443 | ggtt->vm.pte_encode = ivb_pte_encode; |
1444 | else |
1445 | ggtt->vm.pte_encode = snb_pte_encode; |
1446 | |
1447 | ggtt->vm.vma_ops.bind_vma = intel_ggtt_bind_vma; |
1448 | ggtt->vm.vma_ops.unbind_vma = intel_ggtt_unbind_vma; |
1449 | |
1450 | return ggtt_probe_common(ggtt, size); |
1451 | } |
1452 | |
1453 | static int ggtt_probe_hw(struct i915_ggtt *ggtt, struct intel_gt *gt) |
1454 | { |
1455 | struct drm_i915_private *i915 = gt->i915; |
1456 | int ret; |
1457 | |
1458 | ggtt->vm.gt = gt; |
1459 | ggtt->vm.i915 = i915; |
1460 | ggtt->vm.dma = i915->drm.dev; |
1461 | dma_resv_init(obj: &ggtt->vm._resv); |
1462 | |
1463 | if (GRAPHICS_VER(i915) >= 8) |
1464 | ret = gen8_gmch_probe(ggtt); |
1465 | else if (GRAPHICS_VER(i915) >= 6) |
1466 | ret = gen6_gmch_probe(ggtt); |
1467 | else |
1468 | ret = intel_ggtt_gmch_probe(ggtt); |
1469 | |
1470 | if (ret) { |
1471 | dma_resv_fini(obj: &ggtt->vm._resv); |
1472 | return ret; |
1473 | } |
1474 | |
1475 | if ((ggtt->vm.total - 1) >> 32) { |
1476 | drm_err(&i915->drm, |
1477 | "We never expected a Global GTT with more than 32bits" |
1478 | " of address space! Found %lldM!\n" , |
1479 | ggtt->vm.total >> 20); |
1480 | ggtt->vm.total = 1ULL << 32; |
1481 | ggtt->mappable_end = |
1482 | min_t(u64, ggtt->mappable_end, ggtt->vm.total); |
1483 | } |
1484 | |
1485 | if (ggtt->mappable_end > ggtt->vm.total) { |
1486 | drm_err(&i915->drm, |
1487 | "mappable aperture extends past end of GGTT," |
1488 | " aperture=%pa, total=%llx\n" , |
1489 | &ggtt->mappable_end, ggtt->vm.total); |
1490 | ggtt->mappable_end = ggtt->vm.total; |
1491 | } |
1492 | |
1493 | /* GMADR is the PCI mmio aperture into the global GTT. */ |
1494 | drm_dbg(&i915->drm, "GGTT size = %lluM\n" , ggtt->vm.total >> 20); |
1495 | drm_dbg(&i915->drm, "GMADR size = %lluM\n" , |
1496 | (u64)ggtt->mappable_end >> 20); |
1497 | drm_dbg(&i915->drm, "DSM size = %lluM\n" , |
1498 | (u64)resource_size(&intel_graphics_stolen_res) >> 20); |
1499 | |
1500 | return 0; |
1501 | } |
1502 | |
1503 | /** |
1504 | * i915_ggtt_probe_hw - Probe GGTT hardware location |
1505 | * @i915: i915 device |
1506 | */ |
1507 | int i915_ggtt_probe_hw(struct drm_i915_private *i915) |
1508 | { |
1509 | struct intel_gt *gt; |
1510 | int ret, i; |
1511 | |
1512 | for_each_gt(gt, i915, i) { |
1513 | ret = intel_gt_assign_ggtt(gt); |
1514 | if (ret) |
1515 | return ret; |
1516 | } |
1517 | |
1518 | ret = ggtt_probe_hw(ggtt: to_gt(i915)->ggtt, gt: to_gt(i915)); |
1519 | if (ret) |
1520 | return ret; |
1521 | |
1522 | if (i915_vtd_active(i915)) |
1523 | drm_info(&i915->drm, "VT-d active for gfx access\n" ); |
1524 | |
1525 | return 0; |
1526 | } |
1527 | |
1528 | struct i915_ggtt *i915_ggtt_create(struct drm_i915_private *i915) |
1529 | { |
1530 | struct i915_ggtt *ggtt; |
1531 | |
1532 | ggtt = drmm_kzalloc(dev: &i915->drm, size: sizeof(*ggtt), GFP_KERNEL); |
1533 | if (!ggtt) |
1534 | return ERR_PTR(error: -ENOMEM); |
1535 | |
1536 | INIT_LIST_HEAD(list: &ggtt->gt_list); |
1537 | |
1538 | return ggtt; |
1539 | } |
1540 | |
1541 | int i915_ggtt_enable_hw(struct drm_i915_private *i915) |
1542 | { |
1543 | if (GRAPHICS_VER(i915) < 6) |
1544 | return intel_ggtt_gmch_enable_hw(i915); |
1545 | |
1546 | return 0; |
1547 | } |
1548 | |
1549 | /** |
1550 | * i915_ggtt_resume_vm - Restore the memory mappings for a GGTT or DPT VM |
1551 | * @vm: The VM to restore the mappings for |
1552 | * |
1553 | * Restore the memory mappings for all objects mapped to HW via the GGTT or a |
1554 | * DPT page table. |
1555 | * |
1556 | * Returns %true if restoring the mapping for any object that was in a write |
1557 | * domain before suspend. |
1558 | */ |
1559 | bool i915_ggtt_resume_vm(struct i915_address_space *vm) |
1560 | { |
1561 | struct i915_vma *vma; |
1562 | bool write_domain_objs = false; |
1563 | |
1564 | drm_WARN_ON(&vm->i915->drm, !vm->is_ggtt && !vm->is_dpt); |
1565 | |
1566 | /* First fill our portion of the GTT with scratch pages */ |
1567 | vm->clear_range(vm, 0, vm->total); |
1568 | |
1569 | /* clflush objects bound into the GGTT and rebind them. */ |
1570 | list_for_each_entry(vma, &vm->bound_list, vm_link) { |
1571 | struct drm_i915_gem_object *obj = vma->obj; |
1572 | unsigned int was_bound = |
1573 | atomic_read(v: &vma->flags) & I915_VMA_BIND_MASK; |
1574 | |
1575 | GEM_BUG_ON(!was_bound); |
1576 | |
1577 | /* |
1578 | * Clear the bound flags of the vma resource to allow |
1579 | * ptes to be repopulated. |
1580 | */ |
1581 | vma->resource->bound_flags = 0; |
1582 | vma->ops->bind_vma(vm, NULL, vma->resource, |
1583 | obj ? obj->pat_index : |
1584 | i915_gem_get_pat_index(i915: vm->i915, |
1585 | level: I915_CACHE_NONE), |
1586 | was_bound); |
1587 | |
1588 | if (obj) { /* only used during resume => exclusive access */ |
1589 | write_domain_objs |= fetch_and_zero(&obj->write_domain); |
1590 | obj->read_domains |= I915_GEM_DOMAIN_GTT; |
1591 | } |
1592 | } |
1593 | |
1594 | return write_domain_objs; |
1595 | } |
1596 | |
1597 | void i915_ggtt_resume(struct i915_ggtt *ggtt) |
1598 | { |
1599 | struct intel_gt *gt; |
1600 | bool flush; |
1601 | |
1602 | list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) |
1603 | intel_gt_check_and_clear_faults(gt); |
1604 | |
1605 | flush = i915_ggtt_resume_vm(vm: &ggtt->vm); |
1606 | |
1607 | if (drm_mm_node_allocated(node: &ggtt->error_capture)) |
1608 | ggtt->vm.scratch_range(&ggtt->vm, ggtt->error_capture.start, |
1609 | ggtt->error_capture.size); |
1610 | |
1611 | list_for_each_entry(gt, &ggtt->gt_list, ggtt_link) |
1612 | intel_uc_resume_mappings(uc: >->uc); |
1613 | |
1614 | ggtt->invalidate(ggtt); |
1615 | |
1616 | if (flush) |
1617 | wbinvd_on_all_cpus(); |
1618 | |
1619 | intel_ggtt_restore_fences(ggtt); |
1620 | } |
1621 | |