1 | // SPDX-License-Identifier: MIT |
2 | /* |
3 | * Copyright © 2023 Intel Corporation |
4 | */ |
5 | |
6 | #include <linux/fault-inject.h> |
7 | |
8 | #include <drm/drm_managed.h> |
9 | |
10 | #include "xe_device.h" |
11 | #include "xe_ggtt.h" |
12 | #include "xe_gt.h" |
13 | #include "xe_migrate.h" |
14 | #include "xe_pcode.h" |
15 | #include "xe_sa.h" |
16 | #include "xe_svm.h" |
17 | #include "xe_tile.h" |
18 | #include "xe_tile_sysfs.h" |
19 | #include "xe_ttm_vram_mgr.h" |
20 | #include "xe_wa.h" |
21 | |
22 | /** |
23 | * DOC: Multi-tile Design |
24 | * |
25 | * Different vendors use the term "tile" a bit differently, but in the Intel |
26 | * world, a 'tile' is pretty close to what most people would think of as being |
27 | * a complete GPU. When multiple GPUs are placed behind a single PCI device, |
28 | * that's what is referred to as a "multi-tile device." In such cases, pretty |
29 | * much all hardware is replicated per-tile, although certain responsibilities |
30 | * like PCI communication, reporting of interrupts to the OS, etc. are handled |
31 | * solely by the "root tile." A multi-tile platform takes care of tying the |
32 | * tiles together in a way such that interrupt notifications from remote tiles |
33 | * are forwarded to the root tile, the per-tile vram is combined into a single |
34 | * address space, etc. |
35 | * |
36 | * In contrast, a "GT" (which officially stands for "Graphics Technology") is |
37 | * the subset of a GPU/tile that is responsible for implementing graphics |
38 | * and/or media operations. The GT is where a lot of the driver implementation |
39 | * happens since it's where the hardware engines, the execution units, and the |
40 | * GuC all reside. |
41 | * |
42 | * Historically most Intel devices were single-tile devices that contained a |
43 | * single GT. PVC is an example of an Intel platform built on a multi-tile |
44 | * design (i.e., multiple GPUs behind a single PCI device); each PVC tile only |
45 | * has a single GT. In contrast, platforms like MTL that have separate chips |
46 | * for render and media IP are still only a single logical GPU, but the |
47 | * graphics and media IP blocks are each exposed as a separate GT within that |
48 | * single GPU. This is important from a software perspective because multi-GT |
49 | * platforms like MTL only replicate a subset of the GPU hardware and behave |
50 | * differently than multi-tile platforms like PVC where nearly everything is |
51 | * replicated. |
52 | * |
53 | * Per-tile functionality (shared by all GTs within the tile): |
54 | * - Complete 4MB MMIO space (containing SGunit/SoC registers, GT |
55 | * registers, display registers, etc.) |
56 | * - Global GTT |
57 | * - VRAM (if discrete) |
58 | * - Interrupt flows |
59 | * - Migration context |
60 | * - kernel batchbuffer pool |
61 | * - Primary GT |
62 | * - Media GT (if media version >= 13) |
63 | * |
64 | * Per-GT functionality: |
65 | * - GuC |
66 | * - Hardware engines |
67 | * - Programmable hardware units (subslices, EUs) |
68 | * - GSI subset of registers (multiple copies of these registers reside |
69 | * within the complete MMIO space provided by the tile, but at different |
70 | * offsets --- 0 for render, 0x380000 for media) |
71 | * - Multicast register steering |
72 | * - TLBs to cache page table translations |
73 | * - Reset capability |
74 | * - Low-level power management (e.g., C6) |
75 | * - Clock frequency |
76 | * - MOCS and PAT programming |
77 | */ |
78 | |
79 | /** |
80 | * xe_tile_alloc - Perform per-tile memory allocation |
81 | * @tile: Tile to perform allocations for |
82 | * |
83 | * Allocates various per-tile data structures using DRM-managed allocations. |
84 | * Does not touch the hardware. |
85 | * |
86 | * Returns -ENOMEM if allocations fail, otherwise 0. |
87 | */ |
88 | static int xe_tile_alloc(struct xe_tile *tile) |
89 | { |
90 | struct drm_device *drm = &tile_to_xe(tile)->drm; |
91 | |
92 | tile->mem.ggtt = drmm_kzalloc(dev: drm, size: sizeof(*tile->mem.ggtt), |
93 | GFP_KERNEL); |
94 | if (!tile->mem.ggtt) |
95 | return -ENOMEM; |
96 | tile->mem.ggtt->tile = tile; |
97 | |
98 | return 0; |
99 | } |
100 | |
101 | /** |
102 | * xe_tile_init_early - Initialize the tile and primary GT |
103 | * @tile: Tile to initialize |
104 | * @xe: Parent Xe device |
105 | * @id: Tile ID |
106 | * |
107 | * Initializes per-tile resources that don't require any interactions with the |
108 | * hardware or any knowledge about the Graphics/Media IP version. |
109 | * |
110 | * Returns: 0 on success, negative error code on error. |
111 | */ |
112 | int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id) |
113 | { |
114 | int err; |
115 | |
116 | tile->xe = xe; |
117 | tile->id = id; |
118 | |
119 | err = xe_tile_alloc(tile); |
120 | if (err) |
121 | return err; |
122 | |
123 | tile->primary_gt = xe_gt_alloc(tile); |
124 | if (IS_ERR(ptr: tile->primary_gt)) |
125 | return PTR_ERR(ptr: tile->primary_gt); |
126 | |
127 | xe_pcode_init(tile); |
128 | |
129 | return 0; |
130 | } |
131 | ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */ |
132 | |
133 | static int tile_ttm_mgr_init(struct xe_tile *tile) |
134 | { |
135 | struct xe_device *xe = tile_to_xe(tile); |
136 | int err; |
137 | |
138 | if (tile->mem.vram.usable_size) { |
139 | err = xe_ttm_vram_mgr_init(tile, mgr: &tile->mem.vram.ttm); |
140 | if (err) |
141 | return err; |
142 | xe->info.mem_region_mask |= BIT(tile->id) << 1; |
143 | } |
144 | |
145 | return 0; |
146 | } |
147 | |
148 | /** |
149 | * xe_tile_init_noalloc - Init tile up to the point where allocations can happen. |
150 | * @tile: The tile to initialize. |
151 | * |
152 | * This function prepares the tile to allow memory allocations to VRAM, but is |
153 | * not allowed to allocate memory itself. This state is useful for display |
154 | * readout, because the inherited display framebuffer will otherwise be |
155 | * overwritten as it is usually put at the start of VRAM. |
156 | * |
157 | * Note that since this is tile initialization, it should not perform any |
158 | * GT-specific operations, and thus does not need to hold GT forcewake. |
159 | * |
160 | * Returns: 0 on success, negative error code on error. |
161 | */ |
162 | int xe_tile_init_noalloc(struct xe_tile *tile) |
163 | { |
164 | struct xe_device *xe = tile_to_xe(tile); |
165 | int err; |
166 | |
167 | err = tile_ttm_mgr_init(tile); |
168 | if (err) |
169 | return err; |
170 | |
171 | xe_wa_apply_tile_workarounds(tile); |
172 | |
173 | if (xe->info.has_usm && IS_DGFX(xe)) |
174 | xe_devm_add(tile, vr: &tile->mem.vram); |
175 | |
176 | return xe_tile_sysfs_init(tile); |
177 | } |
178 | |
179 | int xe_tile_init(struct xe_tile *tile) |
180 | { |
181 | tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, align: 16); |
182 | if (IS_ERR(ptr: tile->mem.kernel_bb_pool)) |
183 | return PTR_ERR(ptr: tile->mem.kernel_bb_pool); |
184 | |
185 | return 0; |
186 | } |
187 | void xe_tile_migrate_wait(struct xe_tile *tile) |
188 | { |
189 | xe_migrate_wait(m: tile->migrate); |
190 | } |
191 | |