1 | /* |
---|---|
2 | * Copyright 2008 Advanced Micro Devices, Inc. |
3 | * Copyright 2008 Red Hat Inc. |
4 | * Copyright 2009 Jerome Glisse. |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the "Software"), |
8 | * to deal in the Software without restriction, including without limitation |
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
10 | * and/or sell copies of the Software, and to permit persons to whom the |
11 | * Software is furnished to do so, subject to the following conditions: |
12 | * |
13 | * The above copyright notice and this permission notice shall be included in |
14 | * all copies or substantial portions of the Software. |
15 | * |
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
19 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
20 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
21 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
22 | * OTHER DEALINGS IN THE SOFTWARE. |
23 | * |
24 | * Authors: Dave Airlie |
25 | * Alex Deucher |
26 | * Jerome Glisse |
27 | */ |
28 | |
29 | #include <linux/aperture.h> |
30 | #include <linux/power_supply.h> |
31 | #include <linux/kthread.h> |
32 | #include <linux/module.h> |
33 | #include <linux/console.h> |
34 | #include <linux/slab.h> |
35 | #include <linux/iommu.h> |
36 | #include <linux/pci.h> |
37 | #include <linux/pci-p2pdma.h> |
38 | #include <linux/apple-gmux.h> |
39 | |
40 | #include <drm/drm_atomic_helper.h> |
41 | #include <drm/drm_client_event.h> |
42 | #include <drm/drm_crtc_helper.h> |
43 | #include <drm/drm_probe_helper.h> |
44 | #include <drm/amdgpu_drm.h> |
45 | #include <linux/device.h> |
46 | #include <linux/vgaarb.h> |
47 | #include <linux/vga_switcheroo.h> |
48 | #include <linux/efi.h> |
49 | #include "amdgpu.h" |
50 | #include "amdgpu_trace.h" |
51 | #include "amdgpu_i2c.h" |
52 | #include "atom.h" |
53 | #include "amdgpu_atombios.h" |
54 | #include "amdgpu_atomfirmware.h" |
55 | #include "amd_pcie.h" |
56 | #ifdef CONFIG_DRM_AMDGPU_SI |
57 | #include "si.h" |
58 | #endif |
59 | #ifdef CONFIG_DRM_AMDGPU_CIK |
60 | #include "cik.h" |
61 | #endif |
62 | #include "vi.h" |
63 | #include "soc15.h" |
64 | #include "nv.h" |
65 | #include "bif/bif_4_1_d.h" |
66 | #include <linux/firmware.h> |
67 | #include "amdgpu_vf_error.h" |
68 | |
69 | #include "amdgpu_amdkfd.h" |
70 | #include "amdgpu_pm.h" |
71 | |
72 | #include "amdgpu_xgmi.h" |
73 | #include "amdgpu_ras.h" |
74 | #include "amdgpu_pmu.h" |
75 | #include "amdgpu_fru_eeprom.h" |
76 | #include "amdgpu_reset.h" |
77 | #include "amdgpu_virt.h" |
78 | #include "amdgpu_dev_coredump.h" |
79 | |
80 | #include <linux/suspend.h> |
81 | #include <drm/task_barrier.h> |
82 | #include <linux/pm_runtime.h> |
83 | |
84 | #include <drm/drm_drv.h> |
85 | |
86 | #if IS_ENABLED(CONFIG_X86) |
87 | #include <asm/intel-family.h> |
88 | #include <asm/cpu_device_id.h> |
89 | #endif |
90 | |
91 | MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); |
92 | MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); |
93 | MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); |
94 | MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); |
95 | MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); |
96 | MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin"); |
97 | MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin"); |
98 | |
99 | #define AMDGPU_RESUME_MS 2000 |
100 | #define AMDGPU_MAX_RETRY_LIMIT 2 |
101 | #define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY || (r) == -ETIMEDOUT || (r) == -EINVAL) |
102 | #define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2) |
103 | #define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2) |
104 | #define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2) |
105 | |
106 | #define AMDGPU_VBIOS_SKIP (1U << 0) |
107 | #define AMDGPU_VBIOS_OPTIONAL (1U << 1) |
108 | |
109 | static const struct drm_driver amdgpu_kms_driver; |
110 | |
111 | const char *amdgpu_asic_name[] = { |
112 | "TAHITI", |
113 | "PITCAIRN", |
114 | "VERDE", |
115 | "OLAND", |
116 | "HAINAN", |
117 | "BONAIRE", |
118 | "KAVERI", |
119 | "KABINI", |
120 | "HAWAII", |
121 | "MULLINS", |
122 | "TOPAZ", |
123 | "TONGA", |
124 | "FIJI", |
125 | "CARRIZO", |
126 | "STONEY", |
127 | "POLARIS10", |
128 | "POLARIS11", |
129 | "POLARIS12", |
130 | "VEGAM", |
131 | "VEGA10", |
132 | "VEGA12", |
133 | "VEGA20", |
134 | "RAVEN", |
135 | "ARCTURUS", |
136 | "RENOIR", |
137 | "ALDEBARAN", |
138 | "NAVI10", |
139 | "CYAN_SKILLFISH", |
140 | "NAVI14", |
141 | "NAVI12", |
142 | "SIENNA_CICHLID", |
143 | "NAVY_FLOUNDER", |
144 | "VANGOGH", |
145 | "DIMGREY_CAVEFISH", |
146 | "BEIGE_GOBY", |
147 | "YELLOW_CARP", |
148 | "IP DISCOVERY", |
149 | "LAST", |
150 | }; |
151 | |
152 | #define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0) |
153 | /* |
154 | * Default init level where all blocks are expected to be initialized. This is |
155 | * the level of initialization expected by default and also after a full reset |
156 | * of the device. |
157 | */ |
158 | struct amdgpu_init_level amdgpu_init_default = { |
159 | .level = AMDGPU_INIT_LEVEL_DEFAULT, |
160 | .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, |
161 | }; |
162 | |
163 | struct amdgpu_init_level amdgpu_init_recovery = { |
164 | .level = AMDGPU_INIT_LEVEL_RESET_RECOVERY, |
165 | .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, |
166 | }; |
167 | |
168 | /* |
169 | * Minimal blocks needed to be initialized before a XGMI hive can be reset. This |
170 | * is used for cases like reset on initialization where the entire hive needs to |
171 | * be reset before first use. |
172 | */ |
173 | struct amdgpu_init_level amdgpu_init_minimal_xgmi = { |
174 | .level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI, |
175 | .hwini_ip_block_mask = |
176 | BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) | |
177 | BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) | |
178 | BIT(AMD_IP_BLOCK_TYPE_PSP) |
179 | }; |
180 | |
181 | static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev, |
182 | enum amd_ip_block_type block) |
183 | { |
184 | return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0; |
185 | } |
186 | |
187 | void amdgpu_set_init_level(struct amdgpu_device *adev, |
188 | enum amdgpu_init_lvl_id lvl) |
189 | { |
190 | switch (lvl) { |
191 | case AMDGPU_INIT_LEVEL_MINIMAL_XGMI: |
192 | adev->init_lvl = &amdgpu_init_minimal_xgmi; |
193 | break; |
194 | case AMDGPU_INIT_LEVEL_RESET_RECOVERY: |
195 | adev->init_lvl = &amdgpu_init_recovery; |
196 | break; |
197 | case AMDGPU_INIT_LEVEL_DEFAULT: |
198 | fallthrough; |
199 | default: |
200 | adev->init_lvl = &amdgpu_init_default; |
201 | break; |
202 | } |
203 | } |
204 | |
205 | static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); |
206 | static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, |
207 | void *data); |
208 | |
209 | /** |
210 | * DOC: pcie_replay_count |
211 | * |
212 | * The amdgpu driver provides a sysfs API for reporting the total number |
213 | * of PCIe replays (NAKs). |
214 | * The file pcie_replay_count is used for this and returns the total |
215 | * number of replays as a sum of the NAKs generated and NAKs received. |
216 | */ |
217 | |
218 | static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev, |
219 | struct device_attribute *attr, char *buf) |
220 | { |
221 | struct drm_device *ddev = dev_get_drvdata(dev); |
222 | struct amdgpu_device *adev = drm_to_adev(ddev); |
223 | uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev); |
224 | |
225 | return sysfs_emit(buf, fmt: "%llu\n", cnt); |
226 | } |
227 | |
228 | static DEVICE_ATTR(pcie_replay_count, 0444, |
229 | amdgpu_device_get_pcie_replay_count, NULL); |
230 | |
231 | static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev) |
232 | { |
233 | int ret = 0; |
234 | |
235 | if (!amdgpu_sriov_vf(adev)) |
236 | ret = sysfs_create_file(kobj: &adev->dev->kobj, |
237 | attr: &dev_attr_pcie_replay_count.attr); |
238 | |
239 | return ret; |
240 | } |
241 | |
242 | static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev) |
243 | { |
244 | if (!amdgpu_sriov_vf(adev)) |
245 | sysfs_remove_file(kobj: &adev->dev->kobj, |
246 | attr: &dev_attr_pcie_replay_count.attr); |
247 | } |
248 | |
249 | static ssize_t amdgpu_sysfs_reg_state_get(struct file *f, struct kobject *kobj, |
250 | const struct bin_attribute *attr, char *buf, |
251 | loff_t ppos, size_t count) |
252 | { |
253 | struct device *dev = kobj_to_dev(kobj); |
254 | struct drm_device *ddev = dev_get_drvdata(dev); |
255 | struct amdgpu_device *adev = drm_to_adev(ddev); |
256 | ssize_t bytes_read; |
257 | |
258 | switch (ppos) { |
259 | case AMDGPU_SYS_REG_STATE_XGMI: |
260 | bytes_read = amdgpu_asic_get_reg_state( |
261 | adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count); |
262 | break; |
263 | case AMDGPU_SYS_REG_STATE_WAFL: |
264 | bytes_read = amdgpu_asic_get_reg_state( |
265 | adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count); |
266 | break; |
267 | case AMDGPU_SYS_REG_STATE_PCIE: |
268 | bytes_read = amdgpu_asic_get_reg_state( |
269 | adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count); |
270 | break; |
271 | case AMDGPU_SYS_REG_STATE_USR: |
272 | bytes_read = amdgpu_asic_get_reg_state( |
273 | adev, AMDGPU_REG_STATE_TYPE_USR, buf, count); |
274 | break; |
275 | case AMDGPU_SYS_REG_STATE_USR_1: |
276 | bytes_read = amdgpu_asic_get_reg_state( |
277 | adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count); |
278 | break; |
279 | default: |
280 | return -EINVAL; |
281 | } |
282 | |
283 | return bytes_read; |
284 | } |
285 | |
286 | static const BIN_ATTR(reg_state, 0444, amdgpu_sysfs_reg_state_get, NULL, |
287 | AMDGPU_SYS_REG_STATE_END); |
288 | |
289 | int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev) |
290 | { |
291 | int ret; |
292 | |
293 | if (!amdgpu_asic_get_reg_state_supported(adev)) |
294 | return 0; |
295 | |
296 | ret = sysfs_create_bin_file(kobj: &adev->dev->kobj, attr: &bin_attr_reg_state); |
297 | |
298 | return ret; |
299 | } |
300 | |
301 | void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev) |
302 | { |
303 | if (!amdgpu_asic_get_reg_state_supported(adev)) |
304 | return; |
305 | sysfs_remove_bin_file(kobj: &adev->dev->kobj, attr: &bin_attr_reg_state); |
306 | } |
307 | |
308 | int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block) |
309 | { |
310 | int r; |
311 | |
312 | if (ip_block->version->funcs->suspend) { |
313 | r = ip_block->version->funcs->suspend(ip_block); |
314 | if (r) { |
315 | dev_err(ip_block->adev->dev, |
316 | "suspend of IP block <%s> failed %d\n", |
317 | ip_block->version->funcs->name, r); |
318 | return r; |
319 | } |
320 | } |
321 | |
322 | ip_block->status.hw = false; |
323 | return 0; |
324 | } |
325 | |
326 | int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block) |
327 | { |
328 | int r; |
329 | |
330 | if (ip_block->version->funcs->resume) { |
331 | r = ip_block->version->funcs->resume(ip_block); |
332 | if (r) { |
333 | dev_err(ip_block->adev->dev, |
334 | "resume of IP block <%s> failed %d\n", |
335 | ip_block->version->funcs->name, r); |
336 | return r; |
337 | } |
338 | } |
339 | |
340 | ip_block->status.hw = true; |
341 | return 0; |
342 | } |
343 | |
344 | /** |
345 | * DOC: board_info |
346 | * |
347 | * The amdgpu driver provides a sysfs API for giving board related information. |
348 | * It provides the form factor information in the format |
349 | * |
350 | * type : form factor |
351 | * |
352 | * Possible form factor values |
353 | * |
354 | * - "cem" - PCIE CEM card |
355 | * - "oam" - Open Compute Accelerator Module |
356 | * - "unknown" - Not known |
357 | * |
358 | */ |
359 | |
360 | static ssize_t amdgpu_device_get_board_info(struct device *dev, |
361 | struct device_attribute *attr, |
362 | char *buf) |
363 | { |
364 | struct drm_device *ddev = dev_get_drvdata(dev); |
365 | struct amdgpu_device *adev = drm_to_adev(ddev); |
366 | enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM; |
367 | const char *pkg; |
368 | |
369 | if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type) |
370 | pkg_type = adev->smuio.funcs->get_pkg_type(adev); |
371 | |
372 | switch (pkg_type) { |
373 | case AMDGPU_PKG_TYPE_CEM: |
374 | pkg = "cem"; |
375 | break; |
376 | case AMDGPU_PKG_TYPE_OAM: |
377 | pkg = "oam"; |
378 | break; |
379 | default: |
380 | pkg = "unknown"; |
381 | break; |
382 | } |
383 | |
384 | return sysfs_emit(buf, fmt: "%s : %s\n", "type", pkg); |
385 | } |
386 | |
387 | static DEVICE_ATTR(board_info, 0444, amdgpu_device_get_board_info, NULL); |
388 | |
389 | static struct attribute *amdgpu_board_attrs[] = { |
390 | &dev_attr_board_info.attr, |
391 | NULL, |
392 | }; |
393 | |
394 | static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj, |
395 | struct attribute *attr, int n) |
396 | { |
397 | struct device *dev = kobj_to_dev(kobj); |
398 | struct drm_device *ddev = dev_get_drvdata(dev); |
399 | struct amdgpu_device *adev = drm_to_adev(ddev); |
400 | |
401 | if (adev->flags & AMD_IS_APU) |
402 | return 0; |
403 | |
404 | return attr->mode; |
405 | } |
406 | |
407 | static const struct attribute_group amdgpu_board_attrs_group = { |
408 | .attrs = amdgpu_board_attrs, |
409 | .is_visible = amdgpu_board_attrs_is_visible |
410 | }; |
411 | |
412 | static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev); |
413 | |
414 | |
415 | /** |
416 | * amdgpu_device_supports_px - Is the device a dGPU with ATPX power control |
417 | * |
418 | * @dev: drm_device pointer |
419 | * |
420 | * Returns true if the device is a dGPU with ATPX power control, |
421 | * otherwise return false. |
422 | */ |
423 | bool amdgpu_device_supports_px(struct drm_device *dev) |
424 | { |
425 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
426 | |
427 | if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid()) |
428 | return true; |
429 | return false; |
430 | } |
431 | |
432 | /** |
433 | * amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources |
434 | * |
435 | * @dev: drm_device pointer |
436 | * |
437 | * Returns true if the device is a dGPU with ACPI power control, |
438 | * otherwise return false. |
439 | */ |
440 | bool amdgpu_device_supports_boco(struct drm_device *dev) |
441 | { |
442 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
443 | |
444 | if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE)) |
445 | return false; |
446 | |
447 | if (adev->has_pr3 || |
448 | ((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid())) |
449 | return true; |
450 | return false; |
451 | } |
452 | |
453 | /** |
454 | * amdgpu_device_supports_baco - Does the device support BACO |
455 | * |
456 | * @dev: drm_device pointer |
457 | * |
458 | * Return: |
459 | * 1 if the device supports BACO; |
460 | * 3 if the device supports MACO (only works if BACO is supported) |
461 | * otherwise return 0. |
462 | */ |
463 | int amdgpu_device_supports_baco(struct drm_device *dev) |
464 | { |
465 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
466 | |
467 | return amdgpu_asic_supports_baco(adev); |
468 | } |
469 | |
470 | void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev) |
471 | { |
472 | struct drm_device *dev; |
473 | int bamaco_support; |
474 | |
475 | dev = adev_to_drm(adev); |
476 | |
477 | adev->pm.rpm_mode = AMDGPU_RUNPM_NONE; |
478 | bamaco_support = amdgpu_device_supports_baco(dev); |
479 | |
480 | switch (amdgpu_runtime_pm) { |
481 | case 2: |
482 | if (bamaco_support & MACO_SUPPORT) { |
483 | adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; |
484 | dev_info(adev->dev, "Forcing BAMACO for runtime pm\n"); |
485 | } else if (bamaco_support == BACO_SUPPORT) { |
486 | adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; |
487 | dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n"); |
488 | } |
489 | break; |
490 | case 1: |
491 | if (bamaco_support & BACO_SUPPORT) { |
492 | adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; |
493 | dev_info(adev->dev, "Forcing BACO for runtime pm\n"); |
494 | } |
495 | break; |
496 | case -1: |
497 | case -2: |
498 | if (amdgpu_device_supports_px(dev)) { /* enable PX as runtime mode */ |
499 | adev->pm.rpm_mode = AMDGPU_RUNPM_PX; |
500 | dev_info(adev->dev, "Using ATPX for runtime pm\n"); |
501 | } else if (amdgpu_device_supports_boco(dev)) { /* enable boco as runtime mode */ |
502 | adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO; |
503 | dev_info(adev->dev, "Using BOCO for runtime pm\n"); |
504 | } else { |
505 | if (!bamaco_support) |
506 | goto no_runtime_pm; |
507 | |
508 | switch (adev->asic_type) { |
509 | case CHIP_VEGA20: |
510 | case CHIP_ARCTURUS: |
511 | /* BACO are not supported on vega20 and arctrus */ |
512 | break; |
513 | case CHIP_VEGA10: |
514 | /* enable BACO as runpm mode if noretry=0 */ |
515 | if (!adev->gmc.noretry && !amdgpu_passthrough(adev)) |
516 | adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; |
517 | break; |
518 | default: |
519 | /* enable BACO as runpm mode on CI+ */ |
520 | if (!amdgpu_passthrough(adev)) |
521 | adev->pm.rpm_mode = AMDGPU_RUNPM_BACO; |
522 | break; |
523 | } |
524 | |
525 | if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) { |
526 | if (bamaco_support & MACO_SUPPORT) { |
527 | adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO; |
528 | dev_info(adev->dev, "Using BAMACO for runtime pm\n"); |
529 | } else { |
530 | dev_info(adev->dev, "Using BACO for runtime pm\n"); |
531 | } |
532 | } |
533 | } |
534 | break; |
535 | case 0: |
536 | dev_info(adev->dev, "runtime pm is manually disabled\n"); |
537 | break; |
538 | default: |
539 | break; |
540 | } |
541 | |
542 | no_runtime_pm: |
543 | if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) |
544 | dev_info(adev->dev, "Runtime PM not available\n"); |
545 | } |
546 | /** |
547 | * amdgpu_device_supports_smart_shift - Is the device dGPU with |
548 | * smart shift support |
549 | * |
550 | * @dev: drm_device pointer |
551 | * |
552 | * Returns true if the device is a dGPU with Smart Shift support, |
553 | * otherwise returns false. |
554 | */ |
555 | bool amdgpu_device_supports_smart_shift(struct drm_device *dev) |
556 | { |
557 | return (amdgpu_device_supports_boco(dev) && |
558 | amdgpu_acpi_is_power_shift_control_supported()); |
559 | } |
560 | |
561 | /* |
562 | * VRAM access helper functions |
563 | */ |
564 | |
565 | /** |
566 | * amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA |
567 | * |
568 | * @adev: amdgpu_device pointer |
569 | * @pos: offset of the buffer in vram |
570 | * @buf: virtual address of the buffer in system memory |
571 | * @size: read/write size, sizeof(@buf) must > @size |
572 | * @write: true - write to vram, otherwise - read from vram |
573 | */ |
574 | void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos, |
575 | void *buf, size_t size, bool write) |
576 | { |
577 | unsigned long flags; |
578 | uint32_t hi = ~0, tmp = 0; |
579 | uint32_t *data = buf; |
580 | uint64_t last; |
581 | int idx; |
582 | |
583 | if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) |
584 | return; |
585 | |
586 | BUG_ON(!IS_ALIGNED(pos, 4) || !IS_ALIGNED(size, 4)); |
587 | |
588 | spin_lock_irqsave(&adev->mmio_idx_lock, flags); |
589 | for (last = pos + size; pos < last; pos += 4) { |
590 | tmp = pos >> 31; |
591 | |
592 | WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000); |
593 | if (tmp != hi) { |
594 | WREG32_NO_KIQ(mmMM_INDEX_HI, tmp); |
595 | hi = tmp; |
596 | } |
597 | if (write) |
598 | WREG32_NO_KIQ(mmMM_DATA, *data++); |
599 | else |
600 | *data++ = RREG32_NO_KIQ(mmMM_DATA); |
601 | } |
602 | |
603 | spin_unlock_irqrestore(lock: &adev->mmio_idx_lock, flags); |
604 | drm_dev_exit(idx); |
605 | } |
606 | |
607 | /** |
608 | * amdgpu_device_aper_access - access vram by vram aperture |
609 | * |
610 | * @adev: amdgpu_device pointer |
611 | * @pos: offset of the buffer in vram |
612 | * @buf: virtual address of the buffer in system memory |
613 | * @size: read/write size, sizeof(@buf) must > @size |
614 | * @write: true - write to vram, otherwise - read from vram |
615 | * |
616 | * The return value means how many bytes have been transferred. |
617 | */ |
618 | size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos, |
619 | void *buf, size_t size, bool write) |
620 | { |
621 | #ifdef CONFIG_64BIT |
622 | void __iomem *addr; |
623 | size_t count = 0; |
624 | uint64_t last; |
625 | |
626 | if (!adev->mman.aper_base_kaddr) |
627 | return 0; |
628 | |
629 | last = min(pos + size, adev->gmc.visible_vram_size); |
630 | if (last > pos) { |
631 | addr = adev->mman.aper_base_kaddr + pos; |
632 | count = last - pos; |
633 | |
634 | if (write) { |
635 | memcpy_toio(addr, buf, count); |
636 | /* Make sure HDP write cache flush happens without any reordering |
637 | * after the system memory contents are sent over PCIe device |
638 | */ |
639 | mb(); |
640 | amdgpu_device_flush_hdp(adev, NULL); |
641 | } else { |
642 | amdgpu_device_invalidate_hdp(adev, NULL); |
643 | /* Make sure HDP read cache is invalidated before issuing a read |
644 | * to the PCIe device |
645 | */ |
646 | mb(); |
647 | memcpy_fromio(buf, addr, count); |
648 | } |
649 | |
650 | } |
651 | |
652 | return count; |
653 | #else |
654 | return 0; |
655 | #endif |
656 | } |
657 | |
658 | /** |
659 | * amdgpu_device_vram_access - read/write a buffer in vram |
660 | * |
661 | * @adev: amdgpu_device pointer |
662 | * @pos: offset of the buffer in vram |
663 | * @buf: virtual address of the buffer in system memory |
664 | * @size: read/write size, sizeof(@buf) must > @size |
665 | * @write: true - write to vram, otherwise - read from vram |
666 | */ |
667 | void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos, |
668 | void *buf, size_t size, bool write) |
669 | { |
670 | size_t count; |
671 | |
672 | /* try to using vram apreature to access vram first */ |
673 | count = amdgpu_device_aper_access(adev, pos, buf, size, write); |
674 | size -= count; |
675 | if (size) { |
676 | /* using MM to access rest vram */ |
677 | pos += count; |
678 | buf += count; |
679 | amdgpu_device_mm_access(adev, pos, buf, size, write); |
680 | } |
681 | } |
682 | |
683 | /* |
684 | * register access helper functions. |
685 | */ |
686 | |
687 | /* Check if hw access should be skipped because of hotplug or device error */ |
688 | bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev) |
689 | { |
690 | if (adev->no_hw_access) |
691 | return true; |
692 | |
693 | #ifdef CONFIG_LOCKDEP |
694 | /* |
695 | * This is a bit complicated to understand, so worth a comment. What we assert |
696 | * here is that the GPU reset is not running on another thread in parallel. |
697 | * |
698 | * For this we trylock the read side of the reset semaphore, if that succeeds |
699 | * we know that the reset is not running in parallel. |
700 | * |
701 | * If the trylock fails we assert that we are either already holding the read |
702 | * side of the lock or are the reset thread itself and hold the write side of |
703 | * the lock. |
704 | */ |
705 | if (in_task()) { |
706 | if (down_read_trylock(sem: &adev->reset_domain->sem)) |
707 | up_read(sem: &adev->reset_domain->sem); |
708 | else |
709 | lockdep_assert_held(&adev->reset_domain->sem); |
710 | } |
711 | #endif |
712 | return false; |
713 | } |
714 | |
715 | /** |
716 | * amdgpu_device_rreg - read a memory mapped IO or indirect register |
717 | * |
718 | * @adev: amdgpu_device pointer |
719 | * @reg: dword aligned register offset |
720 | * @acc_flags: access flags which require special behavior |
721 | * |
722 | * Returns the 32 bit value from the offset specified. |
723 | */ |
724 | uint32_t amdgpu_device_rreg(struct amdgpu_device *adev, |
725 | uint32_t reg, uint32_t acc_flags) |
726 | { |
727 | uint32_t ret; |
728 | |
729 | if (amdgpu_device_skip_hw_access(adev)) |
730 | return 0; |
731 | |
732 | if ((reg * 4) < adev->rmmio_size) { |
733 | if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && |
734 | amdgpu_sriov_runtime(adev) && |
735 | down_read_trylock(sem: &adev->reset_domain->sem)) { |
736 | ret = amdgpu_kiq_rreg(adev, reg, xcc_id: 0); |
737 | up_read(sem: &adev->reset_domain->sem); |
738 | } else { |
739 | ret = readl(addr: ((void __iomem *)adev->rmmio) + (reg * 4)); |
740 | } |
741 | } else { |
742 | ret = adev->pcie_rreg(adev, reg * 4); |
743 | } |
744 | |
745 | trace_amdgpu_device_rreg(did: adev->pdev->device, reg, value: ret); |
746 | |
747 | return ret; |
748 | } |
749 | |
750 | /* |
751 | * MMIO register read with bytes helper functions |
752 | * @offset:bytes offset from MMIO start |
753 | */ |
754 | |
755 | /** |
756 | * amdgpu_mm_rreg8 - read a memory mapped IO register |
757 | * |
758 | * @adev: amdgpu_device pointer |
759 | * @offset: byte aligned register offset |
760 | * |
761 | * Returns the 8 bit value from the offset specified. |
762 | */ |
763 | uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset) |
764 | { |
765 | if (amdgpu_device_skip_hw_access(adev)) |
766 | return 0; |
767 | |
768 | if (offset < adev->rmmio_size) |
769 | return (readb(addr: adev->rmmio + offset)); |
770 | BUG(); |
771 | } |
772 | |
773 | |
774 | /** |
775 | * amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC |
776 | * |
777 | * @adev: amdgpu_device pointer |
778 | * @reg: dword aligned register offset |
779 | * @acc_flags: access flags which require special behavior |
780 | * @xcc_id: xcc accelerated compute core id |
781 | * |
782 | * Returns the 32 bit value from the offset specified. |
783 | */ |
784 | uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev, |
785 | uint32_t reg, uint32_t acc_flags, |
786 | uint32_t xcc_id) |
787 | { |
788 | uint32_t ret, rlcg_flag; |
789 | |
790 | if (amdgpu_device_skip_hw_access(adev)) |
791 | return 0; |
792 | |
793 | if ((reg * 4) < adev->rmmio_size) { |
794 | if (amdgpu_sriov_vf(adev) && |
795 | !amdgpu_sriov_runtime(adev) && |
796 | adev->gfx.rlc.rlcg_reg_access_supported && |
797 | amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, |
798 | hwip: GC_HWIP, write: false, |
799 | rlcg_flag: &rlcg_flag)) { |
800 | ret = amdgpu_virt_rlcg_reg_rw(adev, offset: reg, v: 0, flag: rlcg_flag, GET_INST(GC, xcc_id)); |
801 | } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && |
802 | amdgpu_sriov_runtime(adev) && |
803 | down_read_trylock(sem: &adev->reset_domain->sem)) { |
804 | ret = amdgpu_kiq_rreg(adev, reg, xcc_id); |
805 | up_read(sem: &adev->reset_domain->sem); |
806 | } else { |
807 | ret = readl(addr: ((void __iomem *)adev->rmmio) + (reg * 4)); |
808 | } |
809 | } else { |
810 | ret = adev->pcie_rreg(adev, reg * 4); |
811 | } |
812 | |
813 | return ret; |
814 | } |
815 | |
816 | /* |
817 | * MMIO register write with bytes helper functions |
818 | * @offset:bytes offset from MMIO start |
819 | * @value: the value want to be written to the register |
820 | */ |
821 | |
822 | /** |
823 | * amdgpu_mm_wreg8 - read a memory mapped IO register |
824 | * |
825 | * @adev: amdgpu_device pointer |
826 | * @offset: byte aligned register offset |
827 | * @value: 8 bit value to write |
828 | * |
829 | * Writes the value specified to the offset specified. |
830 | */ |
831 | void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value) |
832 | { |
833 | if (amdgpu_device_skip_hw_access(adev)) |
834 | return; |
835 | |
836 | if (offset < adev->rmmio_size) |
837 | writeb(val: value, addr: adev->rmmio + offset); |
838 | else |
839 | BUG(); |
840 | } |
841 | |
842 | /** |
843 | * amdgpu_device_wreg - write to a memory mapped IO or indirect register |
844 | * |
845 | * @adev: amdgpu_device pointer |
846 | * @reg: dword aligned register offset |
847 | * @v: 32 bit value to write to the register |
848 | * @acc_flags: access flags which require special behavior |
849 | * |
850 | * Writes the value specified to the offset specified. |
851 | */ |
852 | void amdgpu_device_wreg(struct amdgpu_device *adev, |
853 | uint32_t reg, uint32_t v, |
854 | uint32_t acc_flags) |
855 | { |
856 | if (amdgpu_device_skip_hw_access(adev)) |
857 | return; |
858 | |
859 | if ((reg * 4) < adev->rmmio_size) { |
860 | if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && |
861 | amdgpu_sriov_runtime(adev) && |
862 | down_read_trylock(sem: &adev->reset_domain->sem)) { |
863 | amdgpu_kiq_wreg(adev, reg, v, xcc_id: 0); |
864 | up_read(sem: &adev->reset_domain->sem); |
865 | } else { |
866 | writel(val: v, addr: ((void __iomem *)adev->rmmio) + (reg * 4)); |
867 | } |
868 | } else { |
869 | adev->pcie_wreg(adev, reg * 4, v); |
870 | } |
871 | |
872 | trace_amdgpu_device_wreg(did: adev->pdev->device, reg, value: v); |
873 | } |
874 | |
875 | /** |
876 | * amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range |
877 | * |
878 | * @adev: amdgpu_device pointer |
879 | * @reg: mmio/rlc register |
880 | * @v: value to write |
881 | * @xcc_id: xcc accelerated compute core id |
882 | * |
883 | * this function is invoked only for the debugfs register access |
884 | */ |
885 | void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev, |
886 | uint32_t reg, uint32_t v, |
887 | uint32_t xcc_id) |
888 | { |
889 | if (amdgpu_device_skip_hw_access(adev)) |
890 | return; |
891 | |
892 | if (amdgpu_sriov_fullaccess(adev) && |
893 | adev->gfx.rlc.funcs && |
894 | adev->gfx.rlc.funcs->is_rlcg_access_range) { |
895 | if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg)) |
896 | return amdgpu_sriov_wreg(adev, offset: reg, value: v, acc_flags: 0, hwip: 0, xcc_id); |
897 | } else if ((reg * 4) >= adev->rmmio_size) { |
898 | adev->pcie_wreg(adev, reg * 4, v); |
899 | } else { |
900 | writel(val: v, addr: ((void __iomem *)adev->rmmio) + (reg * 4)); |
901 | } |
902 | } |
903 | |
904 | /** |
905 | * amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC |
906 | * |
907 | * @adev: amdgpu_device pointer |
908 | * @reg: dword aligned register offset |
909 | * @v: 32 bit value to write to the register |
910 | * @acc_flags: access flags which require special behavior |
911 | * @xcc_id: xcc accelerated compute core id |
912 | * |
913 | * Writes the value specified to the offset specified. |
914 | */ |
915 | void amdgpu_device_xcc_wreg(struct amdgpu_device *adev, |
916 | uint32_t reg, uint32_t v, |
917 | uint32_t acc_flags, uint32_t xcc_id) |
918 | { |
919 | uint32_t rlcg_flag; |
920 | |
921 | if (amdgpu_device_skip_hw_access(adev)) |
922 | return; |
923 | |
924 | if ((reg * 4) < adev->rmmio_size) { |
925 | if (amdgpu_sriov_vf(adev) && |
926 | !amdgpu_sriov_runtime(adev) && |
927 | adev->gfx.rlc.rlcg_reg_access_supported && |
928 | amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, |
929 | hwip: GC_HWIP, write: true, |
930 | rlcg_flag: &rlcg_flag)) { |
931 | amdgpu_virt_rlcg_reg_rw(adev, offset: reg, v, flag: rlcg_flag, GET_INST(GC, xcc_id)); |
932 | } else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && |
933 | amdgpu_sriov_runtime(adev) && |
934 | down_read_trylock(sem: &adev->reset_domain->sem)) { |
935 | amdgpu_kiq_wreg(adev, reg, v, xcc_id); |
936 | up_read(sem: &adev->reset_domain->sem); |
937 | } else { |
938 | writel(val: v, addr: ((void __iomem *)adev->rmmio) + (reg * 4)); |
939 | } |
940 | } else { |
941 | adev->pcie_wreg(adev, reg * 4, v); |
942 | } |
943 | } |
944 | |
945 | /** |
946 | * amdgpu_device_indirect_rreg - read an indirect register |
947 | * |
948 | * @adev: amdgpu_device pointer |
949 | * @reg_addr: indirect register address to read from |
950 | * |
951 | * Returns the value of indirect register @reg_addr |
952 | */ |
953 | u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev, |
954 | u32 reg_addr) |
955 | { |
956 | unsigned long flags, pcie_index, pcie_data; |
957 | void __iomem *pcie_index_offset; |
958 | void __iomem *pcie_data_offset; |
959 | u32 r; |
960 | |
961 | pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); |
962 | pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); |
963 | |
964 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
965 | pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; |
966 | pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; |
967 | |
968 | writel(val: reg_addr, addr: pcie_index_offset); |
969 | readl(addr: pcie_index_offset); |
970 | r = readl(addr: pcie_data_offset); |
971 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
972 | |
973 | return r; |
974 | } |
975 | |
976 | u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev, |
977 | u64 reg_addr) |
978 | { |
979 | unsigned long flags, pcie_index, pcie_index_hi, pcie_data; |
980 | u32 r; |
981 | void __iomem *pcie_index_offset; |
982 | void __iomem *pcie_index_hi_offset; |
983 | void __iomem *pcie_data_offset; |
984 | |
985 | if (unlikely(!adev->nbio.funcs)) { |
986 | pcie_index = AMDGPU_PCIE_INDEX_FALLBACK; |
987 | pcie_data = AMDGPU_PCIE_DATA_FALLBACK; |
988 | } else { |
989 | pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); |
990 | pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); |
991 | } |
992 | |
993 | if (reg_addr >> 32) { |
994 | if (unlikely(!adev->nbio.funcs)) |
995 | pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK; |
996 | else |
997 | pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); |
998 | } else { |
999 | pcie_index_hi = 0; |
1000 | } |
1001 | |
1002 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
1003 | pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; |
1004 | pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; |
1005 | if (pcie_index_hi != 0) |
1006 | pcie_index_hi_offset = (void __iomem *)adev->rmmio + |
1007 | pcie_index_hi * 4; |
1008 | |
1009 | writel(val: reg_addr, addr: pcie_index_offset); |
1010 | readl(addr: pcie_index_offset); |
1011 | if (pcie_index_hi != 0) { |
1012 | writel(val: (reg_addr >> 32) & 0xff, addr: pcie_index_hi_offset); |
1013 | readl(addr: pcie_index_hi_offset); |
1014 | } |
1015 | r = readl(addr: pcie_data_offset); |
1016 | |
1017 | /* clear the high bits */ |
1018 | if (pcie_index_hi != 0) { |
1019 | writel(val: 0, addr: pcie_index_hi_offset); |
1020 | readl(addr: pcie_index_hi_offset); |
1021 | } |
1022 | |
1023 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
1024 | |
1025 | return r; |
1026 | } |
1027 | |
1028 | /** |
1029 | * amdgpu_device_indirect_rreg64 - read a 64bits indirect register |
1030 | * |
1031 | * @adev: amdgpu_device pointer |
1032 | * @reg_addr: indirect register address to read from |
1033 | * |
1034 | * Returns the value of indirect register @reg_addr |
1035 | */ |
1036 | u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev, |
1037 | u32 reg_addr) |
1038 | { |
1039 | unsigned long flags, pcie_index, pcie_data; |
1040 | void __iomem *pcie_index_offset; |
1041 | void __iomem *pcie_data_offset; |
1042 | u64 r; |
1043 | |
1044 | pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); |
1045 | pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); |
1046 | |
1047 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
1048 | pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; |
1049 | pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; |
1050 | |
1051 | /* read low 32 bits */ |
1052 | writel(val: reg_addr, addr: pcie_index_offset); |
1053 | readl(addr: pcie_index_offset); |
1054 | r = readl(addr: pcie_data_offset); |
1055 | /* read high 32 bits */ |
1056 | writel(val: reg_addr + 4, addr: pcie_index_offset); |
1057 | readl(addr: pcie_index_offset); |
1058 | r |= ((u64)readl(addr: pcie_data_offset) << 32); |
1059 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
1060 | |
1061 | return r; |
1062 | } |
1063 | |
1064 | u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev, |
1065 | u64 reg_addr) |
1066 | { |
1067 | unsigned long flags, pcie_index, pcie_data; |
1068 | unsigned long pcie_index_hi = 0; |
1069 | void __iomem *pcie_index_offset; |
1070 | void __iomem *pcie_index_hi_offset; |
1071 | void __iomem *pcie_data_offset; |
1072 | u64 r; |
1073 | |
1074 | pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); |
1075 | pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); |
1076 | if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) |
1077 | pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); |
1078 | |
1079 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
1080 | pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; |
1081 | pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; |
1082 | if (pcie_index_hi != 0) |
1083 | pcie_index_hi_offset = (void __iomem *)adev->rmmio + |
1084 | pcie_index_hi * 4; |
1085 | |
1086 | /* read low 32 bits */ |
1087 | writel(val: reg_addr, addr: pcie_index_offset); |
1088 | readl(addr: pcie_index_offset); |
1089 | if (pcie_index_hi != 0) { |
1090 | writel(val: (reg_addr >> 32) & 0xff, addr: pcie_index_hi_offset); |
1091 | readl(addr: pcie_index_hi_offset); |
1092 | } |
1093 | r = readl(addr: pcie_data_offset); |
1094 | /* read high 32 bits */ |
1095 | writel(val: reg_addr + 4, addr: pcie_index_offset); |
1096 | readl(addr: pcie_index_offset); |
1097 | if (pcie_index_hi != 0) { |
1098 | writel(val: (reg_addr >> 32) & 0xff, addr: pcie_index_hi_offset); |
1099 | readl(addr: pcie_index_hi_offset); |
1100 | } |
1101 | r |= ((u64)readl(addr: pcie_data_offset) << 32); |
1102 | |
1103 | /* clear the high bits */ |
1104 | if (pcie_index_hi != 0) { |
1105 | writel(val: 0, addr: pcie_index_hi_offset); |
1106 | readl(addr: pcie_index_hi_offset); |
1107 | } |
1108 | |
1109 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
1110 | |
1111 | return r; |
1112 | } |
1113 | |
1114 | /** |
1115 | * amdgpu_device_indirect_wreg - write an indirect register address |
1116 | * |
1117 | * @adev: amdgpu_device pointer |
1118 | * @reg_addr: indirect register offset |
1119 | * @reg_data: indirect register data |
1120 | * |
1121 | */ |
1122 | void amdgpu_device_indirect_wreg(struct amdgpu_device *adev, |
1123 | u32 reg_addr, u32 reg_data) |
1124 | { |
1125 | unsigned long flags, pcie_index, pcie_data; |
1126 | void __iomem *pcie_index_offset; |
1127 | void __iomem *pcie_data_offset; |
1128 | |
1129 | pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); |
1130 | pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); |
1131 | |
1132 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
1133 | pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; |
1134 | pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; |
1135 | |
1136 | writel(val: reg_addr, addr: pcie_index_offset); |
1137 | readl(addr: pcie_index_offset); |
1138 | writel(val: reg_data, addr: pcie_data_offset); |
1139 | readl(addr: pcie_data_offset); |
1140 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
1141 | } |
1142 | |
1143 | void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev, |
1144 | u64 reg_addr, u32 reg_data) |
1145 | { |
1146 | unsigned long flags, pcie_index, pcie_index_hi, pcie_data; |
1147 | void __iomem *pcie_index_offset; |
1148 | void __iomem *pcie_index_hi_offset; |
1149 | void __iomem *pcie_data_offset; |
1150 | |
1151 | pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); |
1152 | pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); |
1153 | if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) |
1154 | pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); |
1155 | else |
1156 | pcie_index_hi = 0; |
1157 | |
1158 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
1159 | pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; |
1160 | pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; |
1161 | if (pcie_index_hi != 0) |
1162 | pcie_index_hi_offset = (void __iomem *)adev->rmmio + |
1163 | pcie_index_hi * 4; |
1164 | |
1165 | writel(val: reg_addr, addr: pcie_index_offset); |
1166 | readl(addr: pcie_index_offset); |
1167 | if (pcie_index_hi != 0) { |
1168 | writel(val: (reg_addr >> 32) & 0xff, addr: pcie_index_hi_offset); |
1169 | readl(addr: pcie_index_hi_offset); |
1170 | } |
1171 | writel(val: reg_data, addr: pcie_data_offset); |
1172 | readl(addr: pcie_data_offset); |
1173 | |
1174 | /* clear the high bits */ |
1175 | if (pcie_index_hi != 0) { |
1176 | writel(val: 0, addr: pcie_index_hi_offset); |
1177 | readl(addr: pcie_index_hi_offset); |
1178 | } |
1179 | |
1180 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
1181 | } |
1182 | |
1183 | /** |
1184 | * amdgpu_device_indirect_wreg64 - write a 64bits indirect register address |
1185 | * |
1186 | * @adev: amdgpu_device pointer |
1187 | * @reg_addr: indirect register offset |
1188 | * @reg_data: indirect register data |
1189 | * |
1190 | */ |
1191 | void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev, |
1192 | u32 reg_addr, u64 reg_data) |
1193 | { |
1194 | unsigned long flags, pcie_index, pcie_data; |
1195 | void __iomem *pcie_index_offset; |
1196 | void __iomem *pcie_data_offset; |
1197 | |
1198 | pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); |
1199 | pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); |
1200 | |
1201 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
1202 | pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; |
1203 | pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; |
1204 | |
1205 | /* write low 32 bits */ |
1206 | writel(val: reg_addr, addr: pcie_index_offset); |
1207 | readl(addr: pcie_index_offset); |
1208 | writel(val: (u32)(reg_data & 0xffffffffULL), addr: pcie_data_offset); |
1209 | readl(addr: pcie_data_offset); |
1210 | /* write high 32 bits */ |
1211 | writel(val: reg_addr + 4, addr: pcie_index_offset); |
1212 | readl(addr: pcie_index_offset); |
1213 | writel(val: (u32)(reg_data >> 32), addr: pcie_data_offset); |
1214 | readl(addr: pcie_data_offset); |
1215 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
1216 | } |
1217 | |
1218 | void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev, |
1219 | u64 reg_addr, u64 reg_data) |
1220 | { |
1221 | unsigned long flags, pcie_index, pcie_data; |
1222 | unsigned long pcie_index_hi = 0; |
1223 | void __iomem *pcie_index_offset; |
1224 | void __iomem *pcie_index_hi_offset; |
1225 | void __iomem *pcie_data_offset; |
1226 | |
1227 | pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev); |
1228 | pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev); |
1229 | if ((reg_addr >> 32) && (adev->nbio.funcs->get_pcie_index_hi_offset)) |
1230 | pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev); |
1231 | |
1232 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
1233 | pcie_index_offset = (void __iomem *)adev->rmmio + pcie_index * 4; |
1234 | pcie_data_offset = (void __iomem *)adev->rmmio + pcie_data * 4; |
1235 | if (pcie_index_hi != 0) |
1236 | pcie_index_hi_offset = (void __iomem *)adev->rmmio + |
1237 | pcie_index_hi * 4; |
1238 | |
1239 | /* write low 32 bits */ |
1240 | writel(val: reg_addr, addr: pcie_index_offset); |
1241 | readl(addr: pcie_index_offset); |
1242 | if (pcie_index_hi != 0) { |
1243 | writel(val: (reg_addr >> 32) & 0xff, addr: pcie_index_hi_offset); |
1244 | readl(addr: pcie_index_hi_offset); |
1245 | } |
1246 | writel(val: (u32)(reg_data & 0xffffffffULL), addr: pcie_data_offset); |
1247 | readl(addr: pcie_data_offset); |
1248 | /* write high 32 bits */ |
1249 | writel(val: reg_addr + 4, addr: pcie_index_offset); |
1250 | readl(addr: pcie_index_offset); |
1251 | if (pcie_index_hi != 0) { |
1252 | writel(val: (reg_addr >> 32) & 0xff, addr: pcie_index_hi_offset); |
1253 | readl(addr: pcie_index_hi_offset); |
1254 | } |
1255 | writel(val: (u32)(reg_data >> 32), addr: pcie_data_offset); |
1256 | readl(addr: pcie_data_offset); |
1257 | |
1258 | /* clear the high bits */ |
1259 | if (pcie_index_hi != 0) { |
1260 | writel(val: 0, addr: pcie_index_hi_offset); |
1261 | readl(addr: pcie_index_hi_offset); |
1262 | } |
1263 | |
1264 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
1265 | } |
1266 | |
1267 | /** |
1268 | * amdgpu_device_get_rev_id - query device rev_id |
1269 | * |
1270 | * @adev: amdgpu_device pointer |
1271 | * |
1272 | * Return device rev_id |
1273 | */ |
1274 | u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev) |
1275 | { |
1276 | return adev->nbio.funcs->get_rev_id(adev); |
1277 | } |
1278 | |
1279 | /** |
1280 | * amdgpu_invalid_rreg - dummy reg read function |
1281 | * |
1282 | * @adev: amdgpu_device pointer |
1283 | * @reg: offset of register |
1284 | * |
1285 | * Dummy register read function. Used for register blocks |
1286 | * that certain asics don't have (all asics). |
1287 | * Returns the value in the register. |
1288 | */ |
1289 | static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg) |
1290 | { |
1291 | DRM_ERROR("Invalid callback to read register 0x%04X\n", reg); |
1292 | BUG(); |
1293 | return 0; |
1294 | } |
1295 | |
1296 | static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg) |
1297 | { |
1298 | DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); |
1299 | BUG(); |
1300 | return 0; |
1301 | } |
1302 | |
1303 | /** |
1304 | * amdgpu_invalid_wreg - dummy reg write function |
1305 | * |
1306 | * @adev: amdgpu_device pointer |
1307 | * @reg: offset of register |
1308 | * @v: value to write to the register |
1309 | * |
1310 | * Dummy register read function. Used for register blocks |
1311 | * that certain asics don't have (all asics). |
1312 | */ |
1313 | static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) |
1314 | { |
1315 | DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n", |
1316 | reg, v); |
1317 | BUG(); |
1318 | } |
1319 | |
1320 | static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v) |
1321 | { |
1322 | DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n", |
1323 | reg, v); |
1324 | BUG(); |
1325 | } |
1326 | |
1327 | /** |
1328 | * amdgpu_invalid_rreg64 - dummy 64 bit reg read function |
1329 | * |
1330 | * @adev: amdgpu_device pointer |
1331 | * @reg: offset of register |
1332 | * |
1333 | * Dummy register read function. Used for register blocks |
1334 | * that certain asics don't have (all asics). |
1335 | * Returns the value in the register. |
1336 | */ |
1337 | static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg) |
1338 | { |
1339 | DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg); |
1340 | BUG(); |
1341 | return 0; |
1342 | } |
1343 | |
1344 | static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg) |
1345 | { |
1346 | DRM_ERROR("Invalid callback to read register 0x%llX\n", reg); |
1347 | BUG(); |
1348 | return 0; |
1349 | } |
1350 | |
1351 | /** |
1352 | * amdgpu_invalid_wreg64 - dummy reg write function |
1353 | * |
1354 | * @adev: amdgpu_device pointer |
1355 | * @reg: offset of register |
1356 | * @v: value to write to the register |
1357 | * |
1358 | * Dummy register read function. Used for register blocks |
1359 | * that certain asics don't have (all asics). |
1360 | */ |
1361 | static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v) |
1362 | { |
1363 | DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n", |
1364 | reg, v); |
1365 | BUG(); |
1366 | } |
1367 | |
1368 | static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v) |
1369 | { |
1370 | DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n", |
1371 | reg, v); |
1372 | BUG(); |
1373 | } |
1374 | |
1375 | /** |
1376 | * amdgpu_block_invalid_rreg - dummy reg read function |
1377 | * |
1378 | * @adev: amdgpu_device pointer |
1379 | * @block: offset of instance |
1380 | * @reg: offset of register |
1381 | * |
1382 | * Dummy register read function. Used for register blocks |
1383 | * that certain asics don't have (all asics). |
1384 | * Returns the value in the register. |
1385 | */ |
1386 | static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev, |
1387 | uint32_t block, uint32_t reg) |
1388 | { |
1389 | DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n", |
1390 | reg, block); |
1391 | BUG(); |
1392 | return 0; |
1393 | } |
1394 | |
1395 | /** |
1396 | * amdgpu_block_invalid_wreg - dummy reg write function |
1397 | * |
1398 | * @adev: amdgpu_device pointer |
1399 | * @block: offset of instance |
1400 | * @reg: offset of register |
1401 | * @v: value to write to the register |
1402 | * |
1403 | * Dummy register read function. Used for register blocks |
1404 | * that certain asics don't have (all asics). |
1405 | */ |
1406 | static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev, |
1407 | uint32_t block, |
1408 | uint32_t reg, uint32_t v) |
1409 | { |
1410 | DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n", |
1411 | reg, block, v); |
1412 | BUG(); |
1413 | } |
1414 | |
1415 | static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev) |
1416 | { |
1417 | if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU)) |
1418 | return AMDGPU_VBIOS_SKIP; |
1419 | |
1420 | if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev)) |
1421 | return AMDGPU_VBIOS_OPTIONAL; |
1422 | |
1423 | return 0; |
1424 | } |
1425 | |
1426 | /** |
1427 | * amdgpu_device_asic_init - Wrapper for atom asic_init |
1428 | * |
1429 | * @adev: amdgpu_device pointer |
1430 | * |
1431 | * Does any asic specific work and then calls atom asic init. |
1432 | */ |
1433 | static int amdgpu_device_asic_init(struct amdgpu_device *adev) |
1434 | { |
1435 | uint32_t flags; |
1436 | bool optional; |
1437 | int ret; |
1438 | |
1439 | amdgpu_asic_pre_asic_init(adev); |
1440 | flags = amdgpu_device_get_vbios_flags(adev); |
1441 | optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL | AMDGPU_VBIOS_SKIP)); |
1442 | |
1443 | if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 3) || |
1444 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 4) || |
1445 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 5, 0) || |
1446 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) >= IP_VERSION(11, 0, 0)) { |
1447 | amdgpu_psp_wait_for_bootloader(adev); |
1448 | if (optional && !adev->bios) |
1449 | return 0; |
1450 | |
1451 | ret = amdgpu_atomfirmware_asic_init(adev, fb_reset: true); |
1452 | return ret; |
1453 | } else { |
1454 | if (optional && !adev->bios) |
1455 | return 0; |
1456 | |
1457 | return amdgpu_atom_asic_init(ctx: adev->mode_info.atom_context); |
1458 | } |
1459 | |
1460 | return 0; |
1461 | } |
1462 | |
1463 | /** |
1464 | * amdgpu_device_mem_scratch_init - allocate the VRAM scratch page |
1465 | * |
1466 | * @adev: amdgpu_device pointer |
1467 | * |
1468 | * Allocates a scratch page of VRAM for use by various things in the |
1469 | * driver. |
1470 | */ |
1471 | static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev) |
1472 | { |
1473 | return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE, |
1474 | AMDGPU_GEM_DOMAIN_VRAM | |
1475 | AMDGPU_GEM_DOMAIN_GTT, |
1476 | bo_ptr: &adev->mem_scratch.robj, |
1477 | gpu_addr: &adev->mem_scratch.gpu_addr, |
1478 | cpu_addr: (void **)&adev->mem_scratch.ptr); |
1479 | } |
1480 | |
1481 | /** |
1482 | * amdgpu_device_mem_scratch_fini - Free the VRAM scratch page |
1483 | * |
1484 | * @adev: amdgpu_device pointer |
1485 | * |
1486 | * Frees the VRAM scratch page. |
1487 | */ |
1488 | static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev) |
1489 | { |
1490 | amdgpu_bo_free_kernel(bo: &adev->mem_scratch.robj, NULL, NULL); |
1491 | } |
1492 | |
1493 | /** |
1494 | * amdgpu_device_program_register_sequence - program an array of registers. |
1495 | * |
1496 | * @adev: amdgpu_device pointer |
1497 | * @registers: pointer to the register array |
1498 | * @array_size: size of the register array |
1499 | * |
1500 | * Programs an array or registers with and or masks. |
1501 | * This is a helper for setting golden registers. |
1502 | */ |
1503 | void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, |
1504 | const u32 *registers, |
1505 | const u32 array_size) |
1506 | { |
1507 | u32 tmp, reg, and_mask, or_mask; |
1508 | int i; |
1509 | |
1510 | if (array_size % 3) |
1511 | return; |
1512 | |
1513 | for (i = 0; i < array_size; i += 3) { |
1514 | reg = registers[i + 0]; |
1515 | and_mask = registers[i + 1]; |
1516 | or_mask = registers[i + 2]; |
1517 | |
1518 | if (and_mask == 0xffffffff) { |
1519 | tmp = or_mask; |
1520 | } else { |
1521 | tmp = RREG32(reg); |
1522 | tmp &= ~and_mask; |
1523 | if (adev->family >= AMDGPU_FAMILY_AI) |
1524 | tmp |= (or_mask & and_mask); |
1525 | else |
1526 | tmp |= or_mask; |
1527 | } |
1528 | WREG32(reg, tmp); |
1529 | } |
1530 | } |
1531 | |
1532 | /** |
1533 | * amdgpu_device_pci_config_reset - reset the GPU |
1534 | * |
1535 | * @adev: amdgpu_device pointer |
1536 | * |
1537 | * Resets the GPU using the pci config reset sequence. |
1538 | * Only applicable to asics prior to vega10. |
1539 | */ |
1540 | void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) |
1541 | { |
1542 | pci_write_config_dword(dev: adev->pdev, where: 0x7c, AMDGPU_ASIC_RESET_DATA); |
1543 | } |
1544 | |
1545 | /** |
1546 | * amdgpu_device_pci_reset - reset the GPU using generic PCI means |
1547 | * |
1548 | * @adev: amdgpu_device pointer |
1549 | * |
1550 | * Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.). |
1551 | */ |
1552 | int amdgpu_device_pci_reset(struct amdgpu_device *adev) |
1553 | { |
1554 | return pci_reset_function(dev: adev->pdev); |
1555 | } |
1556 | |
1557 | /* |
1558 | * amdgpu_device_wb_*() |
1559 | * Writeback is the method by which the GPU updates special pages in memory |
1560 | * with the status of certain GPU events (fences, ring pointers,etc.). |
1561 | */ |
1562 | |
1563 | /** |
1564 | * amdgpu_device_wb_fini - Disable Writeback and free memory |
1565 | * |
1566 | * @adev: amdgpu_device pointer |
1567 | * |
1568 | * Disables Writeback and frees the Writeback memory (all asics). |
1569 | * Used at driver shutdown. |
1570 | */ |
1571 | static void amdgpu_device_wb_fini(struct amdgpu_device *adev) |
1572 | { |
1573 | if (adev->wb.wb_obj) { |
1574 | amdgpu_bo_free_kernel(bo: &adev->wb.wb_obj, |
1575 | gpu_addr: &adev->wb.gpu_addr, |
1576 | cpu_addr: (void **)&adev->wb.wb); |
1577 | adev->wb.wb_obj = NULL; |
1578 | } |
1579 | } |
1580 | |
1581 | /** |
1582 | * amdgpu_device_wb_init - Init Writeback driver info and allocate memory |
1583 | * |
1584 | * @adev: amdgpu_device pointer |
1585 | * |
1586 | * Initializes writeback and allocates writeback memory (all asics). |
1587 | * Used at driver startup. |
1588 | * Returns 0 on success or an -error on failure. |
1589 | */ |
1590 | static int amdgpu_device_wb_init(struct amdgpu_device *adev) |
1591 | { |
1592 | int r; |
1593 | |
1594 | if (adev->wb.wb_obj == NULL) { |
1595 | /* AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots */ |
1596 | r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * 8, |
1597 | PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, |
1598 | bo_ptr: &adev->wb.wb_obj, gpu_addr: &adev->wb.gpu_addr, |
1599 | cpu_addr: (void **)&adev->wb.wb); |
1600 | if (r) { |
1601 | dev_warn(adev->dev, "(%d) create WB bo failed\n", r); |
1602 | return r; |
1603 | } |
1604 | |
1605 | adev->wb.num_wb = AMDGPU_MAX_WB; |
1606 | memset(&adev->wb.used, 0, sizeof(adev->wb.used)); |
1607 | |
1608 | /* clear wb memory */ |
1609 | memset((char *)adev->wb.wb, 0, AMDGPU_MAX_WB * sizeof(uint32_t) * 8); |
1610 | } |
1611 | |
1612 | return 0; |
1613 | } |
1614 | |
1615 | /** |
1616 | * amdgpu_device_wb_get - Allocate a wb entry |
1617 | * |
1618 | * @adev: amdgpu_device pointer |
1619 | * @wb: wb index |
1620 | * |
1621 | * Allocate a wb slot for use by the driver (all asics). |
1622 | * Returns 0 on success or -EINVAL on failure. |
1623 | */ |
1624 | int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb) |
1625 | { |
1626 | unsigned long flags, offset; |
1627 | |
1628 | spin_lock_irqsave(&adev->wb.lock, flags); |
1629 | offset = find_first_zero_bit(addr: adev->wb.used, size: adev->wb.num_wb); |
1630 | if (offset < adev->wb.num_wb) { |
1631 | __set_bit(offset, adev->wb.used); |
1632 | spin_unlock_irqrestore(lock: &adev->wb.lock, flags); |
1633 | *wb = offset << 3; /* convert to dw offset */ |
1634 | return 0; |
1635 | } else { |
1636 | spin_unlock_irqrestore(lock: &adev->wb.lock, flags); |
1637 | return -EINVAL; |
1638 | } |
1639 | } |
1640 | |
1641 | /** |
1642 | * amdgpu_device_wb_free - Free a wb entry |
1643 | * |
1644 | * @adev: amdgpu_device pointer |
1645 | * @wb: wb index |
1646 | * |
1647 | * Free a wb slot allocated for use by the driver (all asics) |
1648 | */ |
1649 | void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) |
1650 | { |
1651 | unsigned long flags; |
1652 | |
1653 | wb >>= 3; |
1654 | spin_lock_irqsave(&adev->wb.lock, flags); |
1655 | if (wb < adev->wb.num_wb) |
1656 | __clear_bit(wb, adev->wb.used); |
1657 | spin_unlock_irqrestore(lock: &adev->wb.lock, flags); |
1658 | } |
1659 | |
1660 | /** |
1661 | * amdgpu_device_resize_fb_bar - try to resize FB BAR |
1662 | * |
1663 | * @adev: amdgpu_device pointer |
1664 | * |
1665 | * Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not |
1666 | * to fail, but if any of the BARs is not accessible after the size we abort |
1667 | * driver loading by returning -ENODEV. |
1668 | */ |
1669 | int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev) |
1670 | { |
1671 | int rbar_size = pci_rebar_bytes_to_size(bytes: adev->gmc.real_vram_size); |
1672 | struct pci_bus *root; |
1673 | struct resource *res; |
1674 | unsigned int i; |
1675 | u16 cmd; |
1676 | int r; |
1677 | |
1678 | if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT)) |
1679 | return 0; |
1680 | |
1681 | /* Bypass for VF */ |
1682 | if (amdgpu_sriov_vf(adev)) |
1683 | return 0; |
1684 | |
1685 | if (!amdgpu_rebar) |
1686 | return 0; |
1687 | |
1688 | /* resizing on Dell G5 SE platforms causes problems with runtime pm */ |
1689 | if ((amdgpu_runtime_pm != 0) && |
1690 | adev->pdev->vendor == PCI_VENDOR_ID_ATI && |
1691 | adev->pdev->device == 0x731f && |
1692 | adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL) |
1693 | return 0; |
1694 | |
1695 | /* PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 */ |
1696 | if (!pci_find_ext_capability(dev: adev->pdev, PCI_EXT_CAP_ID_VNDR)) |
1697 | DRM_WARN("System can't access extended configuration space, please check!!\n"); |
1698 | |
1699 | /* skip if the bios has already enabled large BAR */ |
1700 | if (adev->gmc.real_vram_size && |
1701 | (pci_resource_len(adev->pdev, 0) >= adev->gmc.real_vram_size)) |
1702 | return 0; |
1703 | |
1704 | /* Check if the root BUS has 64bit memory resources */ |
1705 | root = adev->pdev->bus; |
1706 | while (root->parent) |
1707 | root = root->parent; |
1708 | |
1709 | pci_bus_for_each_resource(root, res, i) { |
1710 | if (res && res->flags & (IORESOURCE_MEM | IORESOURCE_MEM_64) && |
1711 | res->start > 0x100000000ull) |
1712 | break; |
1713 | } |
1714 | |
1715 | /* Trying to resize is pointless without a root hub window above 4GB */ |
1716 | if (!res) |
1717 | return 0; |
1718 | |
1719 | /* Limit the BAR size to what is available */ |
1720 | rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, 0)) - 1, |
1721 | rbar_size); |
1722 | |
1723 | /* Disable memory decoding while we change the BAR addresses and size */ |
1724 | pci_read_config_word(dev: adev->pdev, PCI_COMMAND, val: &cmd); |
1725 | pci_write_config_word(dev: adev->pdev, PCI_COMMAND, |
1726 | val: cmd & ~PCI_COMMAND_MEMORY); |
1727 | |
1728 | /* Free the VRAM and doorbell BAR, we most likely need to move both. */ |
1729 | amdgpu_doorbell_fini(adev); |
1730 | if (adev->asic_type >= CHIP_BONAIRE) |
1731 | pci_release_resource(dev: adev->pdev, resno: 2); |
1732 | |
1733 | pci_release_resource(dev: adev->pdev, resno: 0); |
1734 | |
1735 | r = pci_resize_resource(dev: adev->pdev, i: 0, size: rbar_size); |
1736 | if (r == -ENOSPC) |
1737 | DRM_INFO("Not enough PCI address space for a large BAR."); |
1738 | else if (r && r != -ENOTSUPP) |
1739 | DRM_ERROR("Problem resizing BAR0 (%d).", r); |
1740 | |
1741 | pci_assign_unassigned_bus_resources(bus: adev->pdev->bus); |
1742 | |
1743 | /* When the doorbell or fb BAR isn't available we have no chance of |
1744 | * using the device. |
1745 | */ |
1746 | r = amdgpu_doorbell_init(adev); |
1747 | if (r || (pci_resource_flags(adev->pdev, 0) & IORESOURCE_UNSET)) |
1748 | return -ENODEV; |
1749 | |
1750 | pci_write_config_word(dev: adev->pdev, PCI_COMMAND, val: cmd); |
1751 | |
1752 | return 0; |
1753 | } |
1754 | |
1755 | /* |
1756 | * GPU helpers function. |
1757 | */ |
1758 | /** |
1759 | * amdgpu_device_need_post - check if the hw need post or not |
1760 | * |
1761 | * @adev: amdgpu_device pointer |
1762 | * |
1763 | * Check if the asic has been initialized (all asics) at driver startup |
1764 | * or post is needed if hw reset is performed. |
1765 | * Returns true if need or false if not. |
1766 | */ |
1767 | bool amdgpu_device_need_post(struct amdgpu_device *adev) |
1768 | { |
1769 | uint32_t reg, flags; |
1770 | |
1771 | if (amdgpu_sriov_vf(adev)) |
1772 | return false; |
1773 | |
1774 | flags = amdgpu_device_get_vbios_flags(adev); |
1775 | if (flags & AMDGPU_VBIOS_SKIP) |
1776 | return false; |
1777 | if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios) |
1778 | return false; |
1779 | |
1780 | if (amdgpu_passthrough(adev)) { |
1781 | /* for FIJI: In whole GPU pass-through virtualization case, after VM reboot |
1782 | * some old smc fw still need driver do vPost otherwise gpu hang, while |
1783 | * those smc fw version above 22.15 doesn't have this flaw, so we force |
1784 | * vpost executed for smc version below 22.15 |
1785 | */ |
1786 | if (adev->asic_type == CHIP_FIJI) { |
1787 | int err; |
1788 | uint32_t fw_ver; |
1789 | |
1790 | err = request_firmware(fw: &adev->pm.fw, name: "amdgpu/fiji_smc.bin", device: adev->dev); |
1791 | /* force vPost if error occurred */ |
1792 | if (err) |
1793 | return true; |
1794 | |
1795 | fw_ver = *((uint32_t *)adev->pm.fw->data + 69); |
1796 | release_firmware(fw: adev->pm.fw); |
1797 | if (fw_ver < 0x00160e00) |
1798 | return true; |
1799 | } |
1800 | } |
1801 | |
1802 | /* Don't post if we need to reset whole hive on init */ |
1803 | if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) |
1804 | return false; |
1805 | |
1806 | if (adev->has_hw_reset) { |
1807 | adev->has_hw_reset = false; |
1808 | return true; |
1809 | } |
1810 | |
1811 | /* bios scratch used on CIK+ */ |
1812 | if (adev->asic_type >= CHIP_BONAIRE) |
1813 | return amdgpu_atombios_scratch_need_asic_init(adev); |
1814 | |
1815 | /* check MEM_SIZE for older asics */ |
1816 | reg = amdgpu_asic_get_config_memsize(adev); |
1817 | |
1818 | if ((reg != 0) && (reg != 0xffffffff)) |
1819 | return false; |
1820 | |
1821 | return true; |
1822 | } |
1823 | |
1824 | /* |
1825 | * Check whether seamless boot is supported. |
1826 | * |
1827 | * So far we only support seamless boot on DCE 3.0 or later. |
1828 | * If users report that it works on older ASICS as well, we may |
1829 | * loosen this. |
1830 | */ |
1831 | bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev) |
1832 | { |
1833 | switch (amdgpu_seamless) { |
1834 | case -1: |
1835 | break; |
1836 | case 1: |
1837 | return true; |
1838 | case 0: |
1839 | return false; |
1840 | default: |
1841 | DRM_ERROR("Invalid value for amdgpu.seamless: %d\n", |
1842 | amdgpu_seamless); |
1843 | return false; |
1844 | } |
1845 | |
1846 | if (!(adev->flags & AMD_IS_APU)) |
1847 | return false; |
1848 | |
1849 | if (adev->mman.keep_stolen_vga_memory) |
1850 | return false; |
1851 | |
1852 | return amdgpu_ip_version(adev, ip: DCE_HWIP, inst: 0) >= IP_VERSION(3, 0, 0); |
1853 | } |
1854 | |
1855 | /* |
1856 | * Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids |
1857 | * don't support dynamic speed switching. Until we have confirmation from Intel |
1858 | * that a specific host supports it, it's safer that we keep it disabled for all. |
1859 | * |
1860 | * https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/ |
1861 | * https://gitlab.freedesktop.org/drm/amd/-/issues/2663 |
1862 | */ |
1863 | static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev) |
1864 | { |
1865 | #if IS_ENABLED(CONFIG_X86) |
1866 | struct cpuinfo_x86 *c = &cpu_data(0); |
1867 | |
1868 | /* eGPU change speeds based on USB4 fabric conditions */ |
1869 | if (dev_is_removable(dev: adev->dev)) |
1870 | return true; |
1871 | |
1872 | if (c->x86_vendor == X86_VENDOR_INTEL) |
1873 | return false; |
1874 | #endif |
1875 | return true; |
1876 | } |
1877 | |
1878 | static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev) |
1879 | { |
1880 | #if IS_ENABLED(CONFIG_X86) |
1881 | struct cpuinfo_x86 *c = &cpu_data(0); |
1882 | |
1883 | if (!(amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(12, 0, 0) || |
1884 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(12, 0, 1))) |
1885 | return false; |
1886 | |
1887 | if (c->x86 == 6 && |
1888 | adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) { |
1889 | switch (c->x86_model) { |
1890 | case VFM_MODEL(INTEL_ALDERLAKE): |
1891 | case VFM_MODEL(INTEL_ALDERLAKE_L): |
1892 | case VFM_MODEL(INTEL_RAPTORLAKE): |
1893 | case VFM_MODEL(INTEL_RAPTORLAKE_P): |
1894 | case VFM_MODEL(INTEL_RAPTORLAKE_S): |
1895 | return true; |
1896 | default: |
1897 | return false; |
1898 | } |
1899 | } else { |
1900 | return false; |
1901 | } |
1902 | #else |
1903 | return false; |
1904 | #endif |
1905 | } |
1906 | |
1907 | /** |
1908 | * amdgpu_device_should_use_aspm - check if the device should program ASPM |
1909 | * |
1910 | * @adev: amdgpu_device pointer |
1911 | * |
1912 | * Confirm whether the module parameter and pcie bridge agree that ASPM should |
1913 | * be set for this device. |
1914 | * |
1915 | * Returns true if it should be used or false if not. |
1916 | */ |
1917 | bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev) |
1918 | { |
1919 | switch (amdgpu_aspm) { |
1920 | case -1: |
1921 | break; |
1922 | case 0: |
1923 | return false; |
1924 | case 1: |
1925 | return true; |
1926 | default: |
1927 | return false; |
1928 | } |
1929 | if (adev->flags & AMD_IS_APU) |
1930 | return false; |
1931 | if (amdgpu_device_aspm_support_quirk(adev)) |
1932 | return false; |
1933 | return pcie_aspm_enabled(pdev: adev->pdev); |
1934 | } |
1935 | |
1936 | /* if we get transitioned to only one device, take VGA back */ |
1937 | /** |
1938 | * amdgpu_device_vga_set_decode - enable/disable vga decode |
1939 | * |
1940 | * @pdev: PCI device pointer |
1941 | * @state: enable/disable vga decode |
1942 | * |
1943 | * Enable/disable vga decode (all asics). |
1944 | * Returns VGA resource flags. |
1945 | */ |
1946 | static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev, |
1947 | bool state) |
1948 | { |
1949 | struct amdgpu_device *adev = drm_to_adev(ddev: pci_get_drvdata(pdev)); |
1950 | |
1951 | amdgpu_asic_set_vga_state(adev, state); |
1952 | if (state) |
1953 | return VGA_RSRC_LEGACY_IO | VGA_RSRC_LEGACY_MEM | |
1954 | VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; |
1955 | else |
1956 | return VGA_RSRC_NORMAL_IO | VGA_RSRC_NORMAL_MEM; |
1957 | } |
1958 | |
1959 | /** |
1960 | * amdgpu_device_check_block_size - validate the vm block size |
1961 | * |
1962 | * @adev: amdgpu_device pointer |
1963 | * |
1964 | * Validates the vm block size specified via module parameter. |
1965 | * The vm block size defines number of bits in page table versus page directory, |
1966 | * a page is 4KB so we have 12 bits offset, minimum 9 bits in the |
1967 | * page table and the remaining bits are in the page directory. |
1968 | */ |
1969 | static void amdgpu_device_check_block_size(struct amdgpu_device *adev) |
1970 | { |
1971 | /* defines number of bits in page table versus page directory, |
1972 | * a page is 4KB so we have 12 bits offset, minimum 9 bits in the |
1973 | * page table and the remaining bits are in the page directory |
1974 | */ |
1975 | if (amdgpu_vm_block_size == -1) |
1976 | return; |
1977 | |
1978 | if (amdgpu_vm_block_size < 9) { |
1979 | dev_warn(adev->dev, "VM page table size (%d) too small\n", |
1980 | amdgpu_vm_block_size); |
1981 | amdgpu_vm_block_size = -1; |
1982 | } |
1983 | } |
1984 | |
1985 | /** |
1986 | * amdgpu_device_check_vm_size - validate the vm size |
1987 | * |
1988 | * @adev: amdgpu_device pointer |
1989 | * |
1990 | * Validates the vm size in GB specified via module parameter. |
1991 | * The VM size is the size of the GPU virtual memory space in GB. |
1992 | */ |
1993 | static void amdgpu_device_check_vm_size(struct amdgpu_device *adev) |
1994 | { |
1995 | /* no need to check the default value */ |
1996 | if (amdgpu_vm_size == -1) |
1997 | return; |
1998 | |
1999 | if (amdgpu_vm_size < 1) { |
2000 | dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n", |
2001 | amdgpu_vm_size); |
2002 | amdgpu_vm_size = -1; |
2003 | } |
2004 | } |
2005 | |
2006 | static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev) |
2007 | { |
2008 | struct sysinfo si; |
2009 | bool is_os_64 = (sizeof(void *) == 8); |
2010 | uint64_t total_memory; |
2011 | uint64_t dram_size_seven_GB = 0x1B8000000; |
2012 | uint64_t dram_size_three_GB = 0xB8000000; |
2013 | |
2014 | if (amdgpu_smu_memory_pool_size == 0) |
2015 | return; |
2016 | |
2017 | if (!is_os_64) { |
2018 | DRM_WARN("Not 64-bit OS, feature not supported\n"); |
2019 | goto def_value; |
2020 | } |
2021 | si_meminfo(val: &si); |
2022 | total_memory = (uint64_t)si.totalram * si.mem_unit; |
2023 | |
2024 | if ((amdgpu_smu_memory_pool_size == 1) || |
2025 | (amdgpu_smu_memory_pool_size == 2)) { |
2026 | if (total_memory < dram_size_three_GB) |
2027 | goto def_value1; |
2028 | } else if ((amdgpu_smu_memory_pool_size == 4) || |
2029 | (amdgpu_smu_memory_pool_size == 8)) { |
2030 | if (total_memory < dram_size_seven_GB) |
2031 | goto def_value1; |
2032 | } else { |
2033 | DRM_WARN("Smu memory pool size not supported\n"); |
2034 | goto def_value; |
2035 | } |
2036 | adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << 28; |
2037 | |
2038 | return; |
2039 | |
2040 | def_value1: |
2041 | DRM_WARN("No enough system memory\n"); |
2042 | def_value: |
2043 | adev->pm.smu_prv_buffer_size = 0; |
2044 | } |
2045 | |
2046 | static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev) |
2047 | { |
2048 | if (!(adev->flags & AMD_IS_APU) || |
2049 | adev->asic_type < CHIP_RAVEN) |
2050 | return 0; |
2051 | |
2052 | switch (adev->asic_type) { |
2053 | case CHIP_RAVEN: |
2054 | if (adev->pdev->device == 0x15dd) |
2055 | adev->apu_flags |= AMD_APU_IS_RAVEN; |
2056 | if (adev->pdev->device == 0x15d8) |
2057 | adev->apu_flags |= AMD_APU_IS_PICASSO; |
2058 | break; |
2059 | case CHIP_RENOIR: |
2060 | if ((adev->pdev->device == 0x1636) || |
2061 | (adev->pdev->device == 0x164c)) |
2062 | adev->apu_flags |= AMD_APU_IS_RENOIR; |
2063 | else |
2064 | adev->apu_flags |= AMD_APU_IS_GREEN_SARDINE; |
2065 | break; |
2066 | case CHIP_VANGOGH: |
2067 | adev->apu_flags |= AMD_APU_IS_VANGOGH; |
2068 | break; |
2069 | case CHIP_YELLOW_CARP: |
2070 | break; |
2071 | case CHIP_CYAN_SKILLFISH: |
2072 | if ((adev->pdev->device == 0x13FE) || |
2073 | (adev->pdev->device == 0x143F)) |
2074 | adev->apu_flags |= AMD_APU_IS_CYAN_SKILLFISH2; |
2075 | break; |
2076 | default: |
2077 | break; |
2078 | } |
2079 | |
2080 | return 0; |
2081 | } |
2082 | |
2083 | /** |
2084 | * amdgpu_device_check_arguments - validate module params |
2085 | * |
2086 | * @adev: amdgpu_device pointer |
2087 | * |
2088 | * Validates certain module parameters and updates |
2089 | * the associated values used by the driver (all asics). |
2090 | */ |
2091 | static int amdgpu_device_check_arguments(struct amdgpu_device *adev) |
2092 | { |
2093 | int i; |
2094 | |
2095 | if (amdgpu_sched_jobs < 4) { |
2096 | dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n", |
2097 | amdgpu_sched_jobs); |
2098 | amdgpu_sched_jobs = 4; |
2099 | } else if (!is_power_of_2(n: amdgpu_sched_jobs)) { |
2100 | dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n", |
2101 | amdgpu_sched_jobs); |
2102 | amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs); |
2103 | } |
2104 | |
2105 | if (amdgpu_gart_size != -1 && amdgpu_gart_size < 32) { |
2106 | /* gart size must be greater or equal to 32M */ |
2107 | dev_warn(adev->dev, "gart size (%d) too small\n", |
2108 | amdgpu_gart_size); |
2109 | amdgpu_gart_size = -1; |
2110 | } |
2111 | |
2112 | if (amdgpu_gtt_size != -1 && amdgpu_gtt_size < 32) { |
2113 | /* gtt size must be greater or equal to 32M */ |
2114 | dev_warn(adev->dev, "gtt size (%d) too small\n", |
2115 | amdgpu_gtt_size); |
2116 | amdgpu_gtt_size = -1; |
2117 | } |
2118 | |
2119 | /* valid range is between 4 and 9 inclusive */ |
2120 | if (amdgpu_vm_fragment_size != -1 && |
2121 | (amdgpu_vm_fragment_size > 9 || amdgpu_vm_fragment_size < 4)) { |
2122 | dev_warn(adev->dev, "valid range is between 4 and 9\n"); |
2123 | amdgpu_vm_fragment_size = -1; |
2124 | } |
2125 | |
2126 | if (amdgpu_sched_hw_submission < 2) { |
2127 | dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n", |
2128 | amdgpu_sched_hw_submission); |
2129 | amdgpu_sched_hw_submission = 2; |
2130 | } else if (!is_power_of_2(n: amdgpu_sched_hw_submission)) { |
2131 | dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n", |
2132 | amdgpu_sched_hw_submission); |
2133 | amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission); |
2134 | } |
2135 | |
2136 | if (amdgpu_reset_method < -1 || amdgpu_reset_method > 4) { |
2137 | dev_warn(adev->dev, "invalid option for reset method, reverting to default\n"); |
2138 | amdgpu_reset_method = -1; |
2139 | } |
2140 | |
2141 | amdgpu_device_check_smu_prv_buffer_size(adev); |
2142 | |
2143 | amdgpu_device_check_vm_size(adev); |
2144 | |
2145 | amdgpu_device_check_block_size(adev); |
2146 | |
2147 | adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, load_type: amdgpu_fw_load_type); |
2148 | |
2149 | for (i = 0; i < MAX_XCP; i++) { |
2150 | switch (amdgpu_enforce_isolation) { |
2151 | case -1: |
2152 | case 0: |
2153 | default: |
2154 | /* disable */ |
2155 | adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE; |
2156 | break; |
2157 | case 1: |
2158 | /* enable */ |
2159 | adev->enforce_isolation[i] = |
2160 | AMDGPU_ENFORCE_ISOLATION_ENABLE; |
2161 | break; |
2162 | case 2: |
2163 | /* enable legacy mode */ |
2164 | adev->enforce_isolation[i] = |
2165 | AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY; |
2166 | break; |
2167 | case 3: |
2168 | /* enable only process isolation without submitting cleaner shader */ |
2169 | adev->enforce_isolation[i] = |
2170 | AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER; |
2171 | break; |
2172 | } |
2173 | } |
2174 | |
2175 | return 0; |
2176 | } |
2177 | |
2178 | /** |
2179 | * amdgpu_switcheroo_set_state - set switcheroo state |
2180 | * |
2181 | * @pdev: pci dev pointer |
2182 | * @state: vga_switcheroo state |
2183 | * |
2184 | * Callback for the switcheroo driver. Suspends or resumes |
2185 | * the asics before or after it is powered up using ACPI methods. |
2186 | */ |
2187 | static void amdgpu_switcheroo_set_state(struct pci_dev *pdev, |
2188 | enum vga_switcheroo_state state) |
2189 | { |
2190 | struct drm_device *dev = pci_get_drvdata(pdev); |
2191 | int r; |
2192 | |
2193 | if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF) |
2194 | return; |
2195 | |
2196 | if (state == VGA_SWITCHEROO_ON) { |
2197 | pr_info("switched on\n"); |
2198 | /* don't suspend or resume card normally */ |
2199 | dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; |
2200 | |
2201 | pci_set_power_state(dev: pdev, PCI_D0); |
2202 | amdgpu_device_load_pci_state(pdev); |
2203 | r = pci_enable_device(dev: pdev); |
2204 | if (r) |
2205 | DRM_WARN("pci_enable_device failed (%d)\n", r); |
2206 | amdgpu_device_resume(dev, fbcon: true); |
2207 | |
2208 | dev->switch_power_state = DRM_SWITCH_POWER_ON; |
2209 | } else { |
2210 | pr_info("switched off\n"); |
2211 | dev->switch_power_state = DRM_SWITCH_POWER_CHANGING; |
2212 | amdgpu_device_prepare(dev); |
2213 | amdgpu_device_suspend(dev, fbcon: true); |
2214 | amdgpu_device_cache_pci_state(pdev); |
2215 | /* Shut down the device */ |
2216 | pci_disable_device(dev: pdev); |
2217 | pci_set_power_state(dev: pdev, PCI_D3cold); |
2218 | dev->switch_power_state = DRM_SWITCH_POWER_OFF; |
2219 | } |
2220 | } |
2221 | |
2222 | /** |
2223 | * amdgpu_switcheroo_can_switch - see if switcheroo state can change |
2224 | * |
2225 | * @pdev: pci dev pointer |
2226 | * |
2227 | * Callback for the switcheroo driver. Check of the switcheroo |
2228 | * state can be changed. |
2229 | * Returns true if the state can be changed, false if not. |
2230 | */ |
2231 | static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev) |
2232 | { |
2233 | struct drm_device *dev = pci_get_drvdata(pdev); |
2234 | |
2235 | /* |
2236 | * FIXME: open_count is protected by drm_global_mutex but that would lead to |
2237 | * locking inversion with the driver load path. And the access here is |
2238 | * completely racy anyway. So don't bother with locking for now. |
2239 | */ |
2240 | return atomic_read(v: &dev->open_count) == 0; |
2241 | } |
2242 | |
2243 | static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = { |
2244 | .set_gpu_state = amdgpu_switcheroo_set_state, |
2245 | .reprobe = NULL, |
2246 | .can_switch = amdgpu_switcheroo_can_switch, |
2247 | }; |
2248 | |
2249 | /** |
2250 | * amdgpu_device_ip_set_clockgating_state - set the CG state |
2251 | * |
2252 | * @dev: amdgpu_device pointer |
2253 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) |
2254 | * @state: clockgating state (gate or ungate) |
2255 | * |
2256 | * Sets the requested clockgating state for all instances of |
2257 | * the hardware IP specified. |
2258 | * Returns the error code from the last instance. |
2259 | */ |
2260 | int amdgpu_device_ip_set_clockgating_state(void *dev, |
2261 | enum amd_ip_block_type block_type, |
2262 | enum amd_clockgating_state state) |
2263 | { |
2264 | struct amdgpu_device *adev = dev; |
2265 | int i, r = 0; |
2266 | |
2267 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2268 | if (!adev->ip_blocks[i].status.valid) |
2269 | continue; |
2270 | if (adev->ip_blocks[i].version->type != block_type) |
2271 | continue; |
2272 | if (!adev->ip_blocks[i].version->funcs->set_clockgating_state) |
2273 | continue; |
2274 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state( |
2275 | &adev->ip_blocks[i], state); |
2276 | if (r) |
2277 | DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n", |
2278 | adev->ip_blocks[i].version->funcs->name, r); |
2279 | } |
2280 | return r; |
2281 | } |
2282 | |
2283 | /** |
2284 | * amdgpu_device_ip_set_powergating_state - set the PG state |
2285 | * |
2286 | * @dev: amdgpu_device pointer |
2287 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) |
2288 | * @state: powergating state (gate or ungate) |
2289 | * |
2290 | * Sets the requested powergating state for all instances of |
2291 | * the hardware IP specified. |
2292 | * Returns the error code from the last instance. |
2293 | */ |
2294 | int amdgpu_device_ip_set_powergating_state(void *dev, |
2295 | enum amd_ip_block_type block_type, |
2296 | enum amd_powergating_state state) |
2297 | { |
2298 | struct amdgpu_device *adev = dev; |
2299 | int i, r = 0; |
2300 | |
2301 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2302 | if (!adev->ip_blocks[i].status.valid) |
2303 | continue; |
2304 | if (adev->ip_blocks[i].version->type != block_type) |
2305 | continue; |
2306 | if (!adev->ip_blocks[i].version->funcs->set_powergating_state) |
2307 | continue; |
2308 | r = adev->ip_blocks[i].version->funcs->set_powergating_state( |
2309 | &adev->ip_blocks[i], state); |
2310 | if (r) |
2311 | DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n", |
2312 | adev->ip_blocks[i].version->funcs->name, r); |
2313 | } |
2314 | return r; |
2315 | } |
2316 | |
2317 | /** |
2318 | * amdgpu_device_ip_get_clockgating_state - get the CG state |
2319 | * |
2320 | * @adev: amdgpu_device pointer |
2321 | * @flags: clockgating feature flags |
2322 | * |
2323 | * Walks the list of IPs on the device and updates the clockgating |
2324 | * flags for each IP. |
2325 | * Updates @flags with the feature flags for each hardware IP where |
2326 | * clockgating is enabled. |
2327 | */ |
2328 | void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev, |
2329 | u64 *flags) |
2330 | { |
2331 | int i; |
2332 | |
2333 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2334 | if (!adev->ip_blocks[i].status.valid) |
2335 | continue; |
2336 | if (adev->ip_blocks[i].version->funcs->get_clockgating_state) |
2337 | adev->ip_blocks[i].version->funcs->get_clockgating_state( |
2338 | &adev->ip_blocks[i], flags); |
2339 | } |
2340 | } |
2341 | |
2342 | /** |
2343 | * amdgpu_device_ip_wait_for_idle - wait for idle |
2344 | * |
2345 | * @adev: amdgpu_device pointer |
2346 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) |
2347 | * |
2348 | * Waits for the request hardware IP to be idle. |
2349 | * Returns 0 for success or a negative error code on failure. |
2350 | */ |
2351 | int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev, |
2352 | enum amd_ip_block_type block_type) |
2353 | { |
2354 | int i, r; |
2355 | |
2356 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2357 | if (!adev->ip_blocks[i].status.valid) |
2358 | continue; |
2359 | if (adev->ip_blocks[i].version->type == block_type) { |
2360 | if (adev->ip_blocks[i].version->funcs->wait_for_idle) { |
2361 | r = adev->ip_blocks[i].version->funcs->wait_for_idle( |
2362 | &adev->ip_blocks[i]); |
2363 | if (r) |
2364 | return r; |
2365 | } |
2366 | break; |
2367 | } |
2368 | } |
2369 | return 0; |
2370 | |
2371 | } |
2372 | |
2373 | /** |
2374 | * amdgpu_device_ip_is_valid - is the hardware IP enabled |
2375 | * |
2376 | * @adev: amdgpu_device pointer |
2377 | * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.) |
2378 | * |
2379 | * Check if the hardware IP is enable or not. |
2380 | * Returns true if it the IP is enable, false if not. |
2381 | */ |
2382 | bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev, |
2383 | enum amd_ip_block_type block_type) |
2384 | { |
2385 | int i; |
2386 | |
2387 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2388 | if (adev->ip_blocks[i].version->type == block_type) |
2389 | return adev->ip_blocks[i].status.valid; |
2390 | } |
2391 | return false; |
2392 | |
2393 | } |
2394 | |
2395 | /** |
2396 | * amdgpu_device_ip_get_ip_block - get a hw IP pointer |
2397 | * |
2398 | * @adev: amdgpu_device pointer |
2399 | * @type: Type of hardware IP (SMU, GFX, UVD, etc.) |
2400 | * |
2401 | * Returns a pointer to the hardware IP block structure |
2402 | * if it exists for the asic, otherwise NULL. |
2403 | */ |
2404 | struct amdgpu_ip_block * |
2405 | amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, |
2406 | enum amd_ip_block_type type) |
2407 | { |
2408 | int i; |
2409 | |
2410 | for (i = 0; i < adev->num_ip_blocks; i++) |
2411 | if (adev->ip_blocks[i].version->type == type) |
2412 | return &adev->ip_blocks[i]; |
2413 | |
2414 | return NULL; |
2415 | } |
2416 | |
2417 | /** |
2418 | * amdgpu_device_ip_block_version_cmp |
2419 | * |
2420 | * @adev: amdgpu_device pointer |
2421 | * @type: enum amd_ip_block_type |
2422 | * @major: major version |
2423 | * @minor: minor version |
2424 | * |
2425 | * return 0 if equal or greater |
2426 | * return 1 if smaller or the ip_block doesn't exist |
2427 | */ |
2428 | int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev, |
2429 | enum amd_ip_block_type type, |
2430 | u32 major, u32 minor) |
2431 | { |
2432 | struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type); |
2433 | |
2434 | if (ip_block && ((ip_block->version->major > major) || |
2435 | ((ip_block->version->major == major) && |
2436 | (ip_block->version->minor >= minor)))) |
2437 | return 0; |
2438 | |
2439 | return 1; |
2440 | } |
2441 | |
2442 | /** |
2443 | * amdgpu_device_ip_block_add |
2444 | * |
2445 | * @adev: amdgpu_device pointer |
2446 | * @ip_block_version: pointer to the IP to add |
2447 | * |
2448 | * Adds the IP block driver information to the collection of IPs |
2449 | * on the asic. |
2450 | */ |
2451 | int amdgpu_device_ip_block_add(struct amdgpu_device *adev, |
2452 | const struct amdgpu_ip_block_version *ip_block_version) |
2453 | { |
2454 | if (!ip_block_version) |
2455 | return -EINVAL; |
2456 | |
2457 | switch (ip_block_version->type) { |
2458 | case AMD_IP_BLOCK_TYPE_VCN: |
2459 | if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK) |
2460 | return 0; |
2461 | break; |
2462 | case AMD_IP_BLOCK_TYPE_JPEG: |
2463 | if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK) |
2464 | return 0; |
2465 | break; |
2466 | default: |
2467 | break; |
2468 | } |
2469 | |
2470 | dev_info(adev->dev, "detected ip block number %d <%s>\n", |
2471 | adev->num_ip_blocks, ip_block_version->funcs->name); |
2472 | |
2473 | adev->ip_blocks[adev->num_ip_blocks].adev = adev; |
2474 | |
2475 | adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version; |
2476 | |
2477 | return 0; |
2478 | } |
2479 | |
2480 | /** |
2481 | * amdgpu_device_enable_virtual_display - enable virtual display feature |
2482 | * |
2483 | * @adev: amdgpu_device pointer |
2484 | * |
2485 | * Enabled the virtual display feature if the user has enabled it via |
2486 | * the module parameter virtual_display. This feature provides a virtual |
2487 | * display hardware on headless boards or in virtualized environments. |
2488 | * This function parses and validates the configuration string specified by |
2489 | * the user and configures the virtual display configuration (number of |
2490 | * virtual connectors, crtcs, etc.) specified. |
2491 | */ |
2492 | static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev) |
2493 | { |
2494 | adev->enable_virtual_display = false; |
2495 | |
2496 | if (amdgpu_virtual_display) { |
2497 | const char *pci_address_name = pci_name(pdev: adev->pdev); |
2498 | char *pciaddstr, *pciaddstr_tmp, *pciaddname_tmp, *pciaddname; |
2499 | |
2500 | pciaddstr = kstrdup(s: amdgpu_virtual_display, GFP_KERNEL); |
2501 | pciaddstr_tmp = pciaddstr; |
2502 | while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) { |
2503 | pciaddname = strsep(&pciaddname_tmp, ","); |
2504 | if (!strcmp("all", pciaddname) |
2505 | || !strcmp(pci_address_name, pciaddname)) { |
2506 | long num_crtc; |
2507 | int res = -1; |
2508 | |
2509 | adev->enable_virtual_display = true; |
2510 | |
2511 | if (pciaddname_tmp) |
2512 | res = kstrtol(s: pciaddname_tmp, base: 10, |
2513 | res: &num_crtc); |
2514 | |
2515 | if (!res) { |
2516 | if (num_crtc < 1) |
2517 | num_crtc = 1; |
2518 | if (num_crtc > 6) |
2519 | num_crtc = 6; |
2520 | adev->mode_info.num_crtc = num_crtc; |
2521 | } else { |
2522 | adev->mode_info.num_crtc = 1; |
2523 | } |
2524 | break; |
2525 | } |
2526 | } |
2527 | |
2528 | DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n", |
2529 | amdgpu_virtual_display, pci_address_name, |
2530 | adev->enable_virtual_display, adev->mode_info.num_crtc); |
2531 | |
2532 | kfree(objp: pciaddstr); |
2533 | } |
2534 | } |
2535 | |
2536 | void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev) |
2537 | { |
2538 | if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) { |
2539 | adev->mode_info.num_crtc = 1; |
2540 | adev->enable_virtual_display = true; |
2541 | DRM_INFO("virtual_display:%d, num_crtc:%d\n", |
2542 | adev->enable_virtual_display, adev->mode_info.num_crtc); |
2543 | } |
2544 | } |
2545 | |
2546 | /** |
2547 | * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware |
2548 | * |
2549 | * @adev: amdgpu_device pointer |
2550 | * |
2551 | * Parses the asic configuration parameters specified in the gpu info |
2552 | * firmware and makes them available to the driver for use in configuring |
2553 | * the asic. |
2554 | * Returns 0 on success, -EINVAL on failure. |
2555 | */ |
2556 | static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) |
2557 | { |
2558 | const char *chip_name; |
2559 | int err; |
2560 | const struct gpu_info_firmware_header_v1_0 *hdr; |
2561 | |
2562 | adev->firmware.gpu_info_fw = NULL; |
2563 | |
2564 | if (adev->mman.discovery_bin) |
2565 | return 0; |
2566 | |
2567 | switch (adev->asic_type) { |
2568 | default: |
2569 | return 0; |
2570 | case CHIP_VEGA10: |
2571 | chip_name = "vega10"; |
2572 | break; |
2573 | case CHIP_VEGA12: |
2574 | chip_name = "vega12"; |
2575 | break; |
2576 | case CHIP_RAVEN: |
2577 | if (adev->apu_flags & AMD_APU_IS_RAVEN2) |
2578 | chip_name = "raven2"; |
2579 | else if (adev->apu_flags & AMD_APU_IS_PICASSO) |
2580 | chip_name = "picasso"; |
2581 | else |
2582 | chip_name = "raven"; |
2583 | break; |
2584 | case CHIP_ARCTURUS: |
2585 | chip_name = "arcturus"; |
2586 | break; |
2587 | case CHIP_NAVI12: |
2588 | chip_name = "navi12"; |
2589 | break; |
2590 | } |
2591 | |
2592 | err = amdgpu_ucode_request(adev, fw: &adev->firmware.gpu_info_fw, |
2593 | required: AMDGPU_UCODE_OPTIONAL, |
2594 | fmt: "amdgpu/%s_gpu_info.bin", chip_name); |
2595 | if (err) { |
2596 | dev_err(adev->dev, |
2597 | "Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n", |
2598 | chip_name); |
2599 | goto out; |
2600 | } |
2601 | |
2602 | hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data; |
2603 | amdgpu_ucode_print_gpu_info_hdr(hdr: &hdr->header); |
2604 | |
2605 | switch (hdr->version_major) { |
2606 | case 1: |
2607 | { |
2608 | const struct gpu_info_firmware_v1_0 *gpu_info_fw = |
2609 | (const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data + |
2610 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
2611 | |
2612 | /* |
2613 | * Should be dropped when DAL no longer needs it. |
2614 | */ |
2615 | if (adev->asic_type == CHIP_NAVI12) |
2616 | goto parse_soc_bounding_box; |
2617 | |
2618 | adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se); |
2619 | adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh); |
2620 | adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se); |
2621 | adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se); |
2622 | adev->gfx.config.max_texture_channel_caches = |
2623 | le32_to_cpu(gpu_info_fw->gc_num_tccs); |
2624 | adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs); |
2625 | adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds); |
2626 | adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth); |
2627 | adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth); |
2628 | adev->gfx.config.double_offchip_lds_buf = |
2629 | le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer); |
2630 | adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size); |
2631 | adev->gfx.cu_info.max_waves_per_simd = |
2632 | le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd); |
2633 | adev->gfx.cu_info.max_scratch_slots_per_cu = |
2634 | le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu); |
2635 | adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size); |
2636 | if (hdr->version_minor >= 1) { |
2637 | const struct gpu_info_firmware_v1_1 *gpu_info_fw = |
2638 | (const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data + |
2639 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
2640 | adev->gfx.config.num_sc_per_sh = |
2641 | le32_to_cpu(gpu_info_fw->num_sc_per_sh); |
2642 | adev->gfx.config.num_packer_per_sc = |
2643 | le32_to_cpu(gpu_info_fw->num_packer_per_sc); |
2644 | } |
2645 | |
2646 | parse_soc_bounding_box: |
2647 | /* |
2648 | * soc bounding box info is not integrated in disocovery table, |
2649 | * we always need to parse it from gpu info firmware if needed. |
2650 | */ |
2651 | if (hdr->version_minor == 2) { |
2652 | const struct gpu_info_firmware_v1_2 *gpu_info_fw = |
2653 | (const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data + |
2654 | le32_to_cpu(hdr->header.ucode_array_offset_bytes)); |
2655 | adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box; |
2656 | } |
2657 | break; |
2658 | } |
2659 | default: |
2660 | dev_err(adev->dev, |
2661 | "Unsupported gpu_info table %d\n", hdr->header.ucode_version); |
2662 | err = -EINVAL; |
2663 | goto out; |
2664 | } |
2665 | out: |
2666 | return err; |
2667 | } |
2668 | |
2669 | /** |
2670 | * amdgpu_device_ip_early_init - run early init for hardware IPs |
2671 | * |
2672 | * @adev: amdgpu_device pointer |
2673 | * |
2674 | * Early initialization pass for hardware IPs. The hardware IPs that make |
2675 | * up each asic are discovered each IP's early_init callback is run. This |
2676 | * is the first stage in initializing the asic. |
2677 | * Returns 0 on success, negative error code on failure. |
2678 | */ |
2679 | static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) |
2680 | { |
2681 | struct amdgpu_ip_block *ip_block; |
2682 | struct pci_dev *parent; |
2683 | bool total, skip_bios; |
2684 | uint32_t bios_flags; |
2685 | int i, r; |
2686 | |
2687 | amdgpu_device_enable_virtual_display(adev); |
2688 | |
2689 | if (amdgpu_sriov_vf(adev)) { |
2690 | r = amdgpu_virt_request_full_gpu(adev, init: true); |
2691 | if (r) |
2692 | return r; |
2693 | } |
2694 | |
2695 | switch (adev->asic_type) { |
2696 | #ifdef CONFIG_DRM_AMDGPU_SI |
2697 | case CHIP_VERDE: |
2698 | case CHIP_TAHITI: |
2699 | case CHIP_PITCAIRN: |
2700 | case CHIP_OLAND: |
2701 | case CHIP_HAINAN: |
2702 | adev->family = AMDGPU_FAMILY_SI; |
2703 | r = si_set_ip_blocks(adev); |
2704 | if (r) |
2705 | return r; |
2706 | break; |
2707 | #endif |
2708 | #ifdef CONFIG_DRM_AMDGPU_CIK |
2709 | case CHIP_BONAIRE: |
2710 | case CHIP_HAWAII: |
2711 | case CHIP_KAVERI: |
2712 | case CHIP_KABINI: |
2713 | case CHIP_MULLINS: |
2714 | if (adev->flags & AMD_IS_APU) |
2715 | adev->family = AMDGPU_FAMILY_KV; |
2716 | else |
2717 | adev->family = AMDGPU_FAMILY_CI; |
2718 | |
2719 | r = cik_set_ip_blocks(adev); |
2720 | if (r) |
2721 | return r; |
2722 | break; |
2723 | #endif |
2724 | case CHIP_TOPAZ: |
2725 | case CHIP_TONGA: |
2726 | case CHIP_FIJI: |
2727 | case CHIP_POLARIS10: |
2728 | case CHIP_POLARIS11: |
2729 | case CHIP_POLARIS12: |
2730 | case CHIP_VEGAM: |
2731 | case CHIP_CARRIZO: |
2732 | case CHIP_STONEY: |
2733 | if (adev->flags & AMD_IS_APU) |
2734 | adev->family = AMDGPU_FAMILY_CZ; |
2735 | else |
2736 | adev->family = AMDGPU_FAMILY_VI; |
2737 | |
2738 | r = vi_set_ip_blocks(adev); |
2739 | if (r) |
2740 | return r; |
2741 | break; |
2742 | default: |
2743 | r = amdgpu_discovery_set_ip_blocks(adev); |
2744 | if (r) |
2745 | return r; |
2746 | break; |
2747 | } |
2748 | |
2749 | /* Check for IP version 9.4.3 with A0 hardware */ |
2750 | if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 3) && |
2751 | !amdgpu_device_get_rev_id(adev)) { |
2752 | dev_err(adev->dev, "Unsupported A0 hardware\n"); |
2753 | return -ENODEV; /* device unsupported - no device error */ |
2754 | } |
2755 | |
2756 | if (amdgpu_has_atpx() && |
2757 | (amdgpu_is_atpx_hybrid() || |
2758 | amdgpu_has_atpx_dgpu_power_cntl()) && |
2759 | ((adev->flags & AMD_IS_APU) == 0) && |
2760 | !dev_is_removable(dev: &adev->pdev->dev)) |
2761 | adev->flags |= AMD_IS_PX; |
2762 | |
2763 | if (!(adev->flags & AMD_IS_APU)) { |
2764 | parent = pcie_find_root_port(dev: adev->pdev); |
2765 | adev->has_pr3 = parent ? pci_pr3_present(pdev: parent) : false; |
2766 | } |
2767 | |
2768 | adev->pm.pp_feature = amdgpu_pp_feature_mask; |
2769 | if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) |
2770 | adev->pm.pp_feature &= ~PP_GFXOFF_MASK; |
2771 | if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID) |
2772 | adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK; |
2773 | if (!amdgpu_device_pcie_dynamic_switching_supported(adev)) |
2774 | adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK; |
2775 | |
2776 | total = true; |
2777 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2778 | ip_block = &adev->ip_blocks[i]; |
2779 | |
2780 | if ((amdgpu_ip_block_mask & (1 << i)) == 0) { |
2781 | DRM_WARN("disabled ip block: %d <%s>\n", |
2782 | i, adev->ip_blocks[i].version->funcs->name); |
2783 | adev->ip_blocks[i].status.valid = false; |
2784 | } else if (ip_block->version->funcs->early_init) { |
2785 | r = ip_block->version->funcs->early_init(ip_block); |
2786 | if (r == -ENOENT) { |
2787 | adev->ip_blocks[i].status.valid = false; |
2788 | } else if (r) { |
2789 | DRM_ERROR("early_init of IP block <%s> failed %d\n", |
2790 | adev->ip_blocks[i].version->funcs->name, r); |
2791 | total = false; |
2792 | } else { |
2793 | adev->ip_blocks[i].status.valid = true; |
2794 | } |
2795 | } else { |
2796 | adev->ip_blocks[i].status.valid = true; |
2797 | } |
2798 | /* get the vbios after the asic_funcs are set up */ |
2799 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { |
2800 | r = amdgpu_device_parse_gpu_info_fw(adev); |
2801 | if (r) |
2802 | return r; |
2803 | |
2804 | bios_flags = amdgpu_device_get_vbios_flags(adev); |
2805 | skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP); |
2806 | /* Read BIOS */ |
2807 | if (!skip_bios) { |
2808 | bool optional = |
2809 | !!(bios_flags & AMDGPU_VBIOS_OPTIONAL); |
2810 | if (!amdgpu_get_bios(adev) && !optional) |
2811 | return -EINVAL; |
2812 | |
2813 | if (optional && !adev->bios) |
2814 | dev_info( |
2815 | adev->dev, |
2816 | "VBIOS image optional, proceeding without VBIOS image"); |
2817 | |
2818 | if (adev->bios) { |
2819 | r = amdgpu_atombios_init(adev); |
2820 | if (r) { |
2821 | dev_err(adev->dev, |
2822 | "amdgpu_atombios_init failed\n"); |
2823 | amdgpu_vf_error_put( |
2824 | adev, |
2825 | sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, |
2826 | error_flags: 0, error_data: 0); |
2827 | return r; |
2828 | } |
2829 | } |
2830 | } |
2831 | |
2832 | /*get pf2vf msg info at it's earliest time*/ |
2833 | if (amdgpu_sriov_vf(adev)) |
2834 | amdgpu_virt_init_data_exchange(adev); |
2835 | |
2836 | } |
2837 | } |
2838 | if (!total) |
2839 | return -ENODEV; |
2840 | |
2841 | if (adev->gmc.xgmi.supported) |
2842 | amdgpu_xgmi_early_init(adev); |
2843 | |
2844 | ip_block = amdgpu_device_ip_get_ip_block(adev, type: AMD_IP_BLOCK_TYPE_GFX); |
2845 | if (ip_block->status.valid != false) |
2846 | amdgpu_amdkfd_device_probe(adev); |
2847 | |
2848 | adev->cg_flags &= amdgpu_cg_mask; |
2849 | adev->pg_flags &= amdgpu_pg_mask; |
2850 | |
2851 | return 0; |
2852 | } |
2853 | |
2854 | static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) |
2855 | { |
2856 | int i, r; |
2857 | |
2858 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2859 | if (!adev->ip_blocks[i].status.sw) |
2860 | continue; |
2861 | if (adev->ip_blocks[i].status.hw) |
2862 | continue; |
2863 | if (!amdgpu_ip_member_of_hwini( |
2864 | adev, block: adev->ip_blocks[i].version->type)) |
2865 | continue; |
2866 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || |
2867 | (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || |
2868 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { |
2869 | r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); |
2870 | if (r) { |
2871 | DRM_ERROR("hw_init of IP block <%s> failed %d\n", |
2872 | adev->ip_blocks[i].version->funcs->name, r); |
2873 | return r; |
2874 | } |
2875 | adev->ip_blocks[i].status.hw = true; |
2876 | } |
2877 | } |
2878 | |
2879 | return 0; |
2880 | } |
2881 | |
2882 | static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) |
2883 | { |
2884 | int i, r; |
2885 | |
2886 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2887 | if (!adev->ip_blocks[i].status.sw) |
2888 | continue; |
2889 | if (adev->ip_blocks[i].status.hw) |
2890 | continue; |
2891 | if (!amdgpu_ip_member_of_hwini( |
2892 | adev, block: adev->ip_blocks[i].version->type)) |
2893 | continue; |
2894 | r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); |
2895 | if (r) { |
2896 | DRM_ERROR("hw_init of IP block <%s> failed %d\n", |
2897 | adev->ip_blocks[i].version->funcs->name, r); |
2898 | return r; |
2899 | } |
2900 | adev->ip_blocks[i].status.hw = true; |
2901 | } |
2902 | |
2903 | return 0; |
2904 | } |
2905 | |
2906 | static int amdgpu_device_fw_loading(struct amdgpu_device *adev) |
2907 | { |
2908 | int r = 0; |
2909 | int i; |
2910 | uint32_t smu_version; |
2911 | |
2912 | if (adev->asic_type >= CHIP_VEGA10) { |
2913 | for (i = 0; i < adev->num_ip_blocks; i++) { |
2914 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) |
2915 | continue; |
2916 | |
2917 | if (!amdgpu_ip_member_of_hwini(adev, |
2918 | block: AMD_IP_BLOCK_TYPE_PSP)) |
2919 | break; |
2920 | |
2921 | if (!adev->ip_blocks[i].status.sw) |
2922 | continue; |
2923 | |
2924 | /* no need to do the fw loading again if already done*/ |
2925 | if (adev->ip_blocks[i].status.hw == true) |
2926 | break; |
2927 | |
2928 | if (amdgpu_in_reset(adev) || adev->in_suspend) { |
2929 | r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]); |
2930 | if (r) |
2931 | return r; |
2932 | } else { |
2933 | r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); |
2934 | if (r) { |
2935 | DRM_ERROR("hw_init of IP block <%s> failed %d\n", |
2936 | adev->ip_blocks[i].version->funcs->name, r); |
2937 | return r; |
2938 | } |
2939 | adev->ip_blocks[i].status.hw = true; |
2940 | } |
2941 | break; |
2942 | } |
2943 | } |
2944 | |
2945 | if (!amdgpu_sriov_vf(adev) || adev->asic_type == CHIP_TONGA) |
2946 | r = amdgpu_pm_load_smu_firmware(adev, smu_version: &smu_version); |
2947 | |
2948 | return r; |
2949 | } |
2950 | |
2951 | static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) |
2952 | { |
2953 | struct drm_sched_init_args args = { |
2954 | .ops = &amdgpu_sched_ops, |
2955 | .num_rqs = DRM_SCHED_PRIORITY_COUNT, |
2956 | .timeout_wq = adev->reset_domain->wq, |
2957 | .dev = adev->dev, |
2958 | }; |
2959 | long timeout; |
2960 | int r, i; |
2961 | |
2962 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
2963 | struct amdgpu_ring *ring = adev->rings[i]; |
2964 | |
2965 | /* No need to setup the GPU scheduler for rings that don't need it */ |
2966 | if (!ring || ring->no_scheduler) |
2967 | continue; |
2968 | |
2969 | switch (ring->funcs->type) { |
2970 | case AMDGPU_RING_TYPE_GFX: |
2971 | timeout = adev->gfx_timeout; |
2972 | break; |
2973 | case AMDGPU_RING_TYPE_COMPUTE: |
2974 | timeout = adev->compute_timeout; |
2975 | break; |
2976 | case AMDGPU_RING_TYPE_SDMA: |
2977 | timeout = adev->sdma_timeout; |
2978 | break; |
2979 | default: |
2980 | timeout = adev->video_timeout; |
2981 | break; |
2982 | } |
2983 | |
2984 | args.timeout = timeout; |
2985 | args.credit_limit = ring->num_hw_submission; |
2986 | args.score = ring->sched_score; |
2987 | args.name = ring->name; |
2988 | |
2989 | r = drm_sched_init(sched: &ring->sched, args: &args); |
2990 | if (r) { |
2991 | DRM_ERROR("Failed to create scheduler on ring %s.\n", |
2992 | ring->name); |
2993 | return r; |
2994 | } |
2995 | r = amdgpu_uvd_entity_init(adev, ring); |
2996 | if (r) { |
2997 | DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n", |
2998 | ring->name); |
2999 | return r; |
3000 | } |
3001 | r = amdgpu_vce_entity_init(adev, ring); |
3002 | if (r) { |
3003 | DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n", |
3004 | ring->name); |
3005 | return r; |
3006 | } |
3007 | } |
3008 | |
3009 | amdgpu_xcp_update_partition_sched_list(adev); |
3010 | |
3011 | return 0; |
3012 | } |
3013 | |
3014 | |
3015 | /** |
3016 | * amdgpu_device_ip_init - run init for hardware IPs |
3017 | * |
3018 | * @adev: amdgpu_device pointer |
3019 | * |
3020 | * Main initialization pass for hardware IPs. The list of all the hardware |
3021 | * IPs that make up the asic is walked and the sw_init and hw_init callbacks |
3022 | * are run. sw_init initializes the software state associated with each IP |
3023 | * and hw_init initializes the hardware associated with each IP. |
3024 | * Returns 0 on success, negative error code on failure. |
3025 | */ |
3026 | static int amdgpu_device_ip_init(struct amdgpu_device *adev) |
3027 | { |
3028 | bool init_badpage; |
3029 | int i, r; |
3030 | |
3031 | r = amdgpu_ras_init(adev); |
3032 | if (r) |
3033 | return r; |
3034 | |
3035 | for (i = 0; i < adev->num_ip_blocks; i++) { |
3036 | if (!adev->ip_blocks[i].status.valid) |
3037 | continue; |
3038 | if (adev->ip_blocks[i].version->funcs->sw_init) { |
3039 | r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]); |
3040 | if (r) { |
3041 | DRM_ERROR("sw_init of IP block <%s> failed %d\n", |
3042 | adev->ip_blocks[i].version->funcs->name, r); |
3043 | goto init_failed; |
3044 | } |
3045 | } |
3046 | adev->ip_blocks[i].status.sw = true; |
3047 | |
3048 | if (!amdgpu_ip_member_of_hwini( |
3049 | adev, block: adev->ip_blocks[i].version->type)) |
3050 | continue; |
3051 | |
3052 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { |
3053 | /* need to do common hw init early so everything is set up for gmc */ |
3054 | r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); |
3055 | if (r) { |
3056 | DRM_ERROR("hw_init %d failed %d\n", i, r); |
3057 | goto init_failed; |
3058 | } |
3059 | adev->ip_blocks[i].status.hw = true; |
3060 | } else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { |
3061 | /* need to do gmc hw init early so we can allocate gpu mem */ |
3062 | /* Try to reserve bad pages early */ |
3063 | if (amdgpu_sriov_vf(adev)) |
3064 | amdgpu_virt_exchange_data(adev); |
3065 | |
3066 | r = amdgpu_device_mem_scratch_init(adev); |
3067 | if (r) { |
3068 | DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r); |
3069 | goto init_failed; |
3070 | } |
3071 | r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]); |
3072 | if (r) { |
3073 | DRM_ERROR("hw_init %d failed %d\n", i, r); |
3074 | goto init_failed; |
3075 | } |
3076 | r = amdgpu_device_wb_init(adev); |
3077 | if (r) { |
3078 | DRM_ERROR("amdgpu_device_wb_init failed %d\n", r); |
3079 | goto init_failed; |
3080 | } |
3081 | adev->ip_blocks[i].status.hw = true; |
3082 | |
3083 | /* right after GMC hw init, we create CSA */ |
3084 | if (adev->gfx.mcbp) { |
3085 | r = amdgpu_allocate_static_csa(adev, bo: &adev->virt.csa_obj, |
3086 | AMDGPU_GEM_DOMAIN_VRAM | |
3087 | AMDGPU_GEM_DOMAIN_GTT, |
3088 | AMDGPU_CSA_SIZE); |
3089 | if (r) { |
3090 | DRM_ERROR("allocate CSA failed %d\n", r); |
3091 | goto init_failed; |
3092 | } |
3093 | } |
3094 | |
3095 | r = amdgpu_seq64_init(adev); |
3096 | if (r) { |
3097 | DRM_ERROR("allocate seq64 failed %d\n", r); |
3098 | goto init_failed; |
3099 | } |
3100 | } |
3101 | } |
3102 | |
3103 | if (amdgpu_sriov_vf(adev)) |
3104 | amdgpu_virt_init_data_exchange(adev); |
3105 | |
3106 | r = amdgpu_ib_pool_init(adev); |
3107 | if (r) { |
3108 | dev_err(adev->dev, "IB initialization failed (%d).\n", r); |
3109 | amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_IB_INIT_FAIL, error_flags: 0, error_data: r); |
3110 | goto init_failed; |
3111 | } |
3112 | |
3113 | r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ |
3114 | if (r) |
3115 | goto init_failed; |
3116 | |
3117 | r = amdgpu_device_ip_hw_init_phase1(adev); |
3118 | if (r) |
3119 | goto init_failed; |
3120 | |
3121 | r = amdgpu_device_fw_loading(adev); |
3122 | if (r) |
3123 | goto init_failed; |
3124 | |
3125 | r = amdgpu_device_ip_hw_init_phase2(adev); |
3126 | if (r) |
3127 | goto init_failed; |
3128 | |
3129 | /* |
3130 | * retired pages will be loaded from eeprom and reserved here, |
3131 | * it should be called after amdgpu_device_ip_hw_init_phase2 since |
3132 | * for some ASICs the RAS EEPROM code relies on SMU fully functioning |
3133 | * for I2C communication which only true at this point. |
3134 | * |
3135 | * amdgpu_ras_recovery_init may fail, but the upper only cares the |
3136 | * failure from bad gpu situation and stop amdgpu init process |
3137 | * accordingly. For other failed cases, it will still release all |
3138 | * the resource and print error message, rather than returning one |
3139 | * negative value to upper level. |
3140 | * |
3141 | * Note: theoretically, this should be called before all vram allocations |
3142 | * to protect retired page from abusing |
3143 | */ |
3144 | init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI); |
3145 | r = amdgpu_ras_recovery_init(adev, init_bp_info: init_badpage); |
3146 | if (r) |
3147 | goto init_failed; |
3148 | |
3149 | /** |
3150 | * In case of XGMI grab extra reference for reset domain for this device |
3151 | */ |
3152 | if (adev->gmc.xgmi.num_physical_nodes > 1) { |
3153 | if (amdgpu_xgmi_add_device(adev) == 0) { |
3154 | if (!amdgpu_sriov_vf(adev)) { |
3155 | struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); |
3156 | |
3157 | if (WARN_ON(!hive)) { |
3158 | r = -ENOENT; |
3159 | goto init_failed; |
3160 | } |
3161 | |
3162 | if (!hive->reset_domain || |
3163 | !amdgpu_reset_get_reset_domain(domain: hive->reset_domain)) { |
3164 | r = -ENOENT; |
3165 | amdgpu_put_xgmi_hive(hive); |
3166 | goto init_failed; |
3167 | } |
3168 | |
3169 | /* Drop the early temporary reset domain we created for device */ |
3170 | amdgpu_reset_put_reset_domain(domain: adev->reset_domain); |
3171 | adev->reset_domain = hive->reset_domain; |
3172 | amdgpu_put_xgmi_hive(hive); |
3173 | } |
3174 | } |
3175 | } |
3176 | |
3177 | r = amdgpu_device_init_schedulers(adev); |
3178 | if (r) |
3179 | goto init_failed; |
3180 | |
3181 | if (adev->mman.buffer_funcs_ring->sched.ready) |
3182 | amdgpu_ttm_set_buffer_funcs_status(adev, enable: true); |
3183 | |
3184 | /* Don't init kfd if whole hive need to be reset during init */ |
3185 | if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { |
3186 | kgd2kfd_init_zone_device(adev); |
3187 | amdgpu_amdkfd_device_init(adev); |
3188 | } |
3189 | |
3190 | amdgpu_fru_get_product_info(adev); |
3191 | |
3192 | if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev)) |
3193 | r = amdgpu_cper_init(adev); |
3194 | |
3195 | init_failed: |
3196 | |
3197 | return r; |
3198 | } |
3199 | |
3200 | /** |
3201 | * amdgpu_device_fill_reset_magic - writes reset magic to gart pointer |
3202 | * |
3203 | * @adev: amdgpu_device pointer |
3204 | * |
3205 | * Writes a reset magic value to the gart pointer in VRAM. The driver calls |
3206 | * this function before a GPU reset. If the value is retained after a |
3207 | * GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents. |
3208 | */ |
3209 | static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev) |
3210 | { |
3211 | memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM); |
3212 | } |
3213 | |
3214 | /** |
3215 | * amdgpu_device_check_vram_lost - check if vram is valid |
3216 | * |
3217 | * @adev: amdgpu_device pointer |
3218 | * |
3219 | * Checks the reset magic value written to the gart pointer in VRAM. |
3220 | * The driver calls this after a GPU reset to see if the contents of |
3221 | * VRAM is lost or now. |
3222 | * returns true if vram is lost, false if not. |
3223 | */ |
3224 | static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) |
3225 | { |
3226 | if (memcmp(p: adev->gart.ptr, q: adev->reset_magic, |
3227 | AMDGPU_RESET_MAGIC_NUM)) |
3228 | return true; |
3229 | |
3230 | if (!amdgpu_in_reset(adev)) |
3231 | return false; |
3232 | |
3233 | /* |
3234 | * For all ASICs with baco/mode1 reset, the VRAM is |
3235 | * always assumed to be lost. |
3236 | */ |
3237 | switch (amdgpu_asic_reset_method(adev)) { |
3238 | case AMD_RESET_METHOD_LINK: |
3239 | case AMD_RESET_METHOD_BACO: |
3240 | case AMD_RESET_METHOD_MODE1: |
3241 | return true; |
3242 | default: |
3243 | return false; |
3244 | } |
3245 | } |
3246 | |
3247 | /** |
3248 | * amdgpu_device_set_cg_state - set clockgating for amdgpu device |
3249 | * |
3250 | * @adev: amdgpu_device pointer |
3251 | * @state: clockgating state (gate or ungate) |
3252 | * |
3253 | * The list of all the hardware IPs that make up the asic is walked and the |
3254 | * set_clockgating_state callbacks are run. |
3255 | * Late initialization pass enabling clockgating for hardware IPs. |
3256 | * Fini or suspend, pass disabling clockgating for hardware IPs. |
3257 | * Returns 0 on success, negative error code on failure. |
3258 | */ |
3259 | |
3260 | int amdgpu_device_set_cg_state(struct amdgpu_device *adev, |
3261 | enum amd_clockgating_state state) |
3262 | { |
3263 | int i, j, r; |
3264 | |
3265 | if (amdgpu_emu_mode == 1) |
3266 | return 0; |
3267 | |
3268 | for (j = 0; j < adev->num_ip_blocks; j++) { |
3269 | i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; |
3270 | if (!adev->ip_blocks[i].status.late_initialized) |
3271 | continue; |
3272 | /* skip CG for GFX, SDMA on S0ix */ |
3273 | if (adev->in_s0ix && |
3274 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || |
3275 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) |
3276 | continue; |
3277 | /* skip CG for VCE/UVD, it's handled specially */ |
3278 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && |
3279 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && |
3280 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && |
3281 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && |
3282 | adev->ip_blocks[i].version->funcs->set_clockgating_state) { |
3283 | /* enable clockgating to save power */ |
3284 | r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i], |
3285 | state); |
3286 | if (r) { |
3287 | DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", |
3288 | adev->ip_blocks[i].version->funcs->name, r); |
3289 | return r; |
3290 | } |
3291 | } |
3292 | } |
3293 | |
3294 | return 0; |
3295 | } |
3296 | |
3297 | int amdgpu_device_set_pg_state(struct amdgpu_device *adev, |
3298 | enum amd_powergating_state state) |
3299 | { |
3300 | int i, j, r; |
3301 | |
3302 | if (amdgpu_emu_mode == 1) |
3303 | return 0; |
3304 | |
3305 | for (j = 0; j < adev->num_ip_blocks; j++) { |
3306 | i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; |
3307 | if (!adev->ip_blocks[i].status.late_initialized) |
3308 | continue; |
3309 | /* skip PG for GFX, SDMA on S0ix */ |
3310 | if (adev->in_s0ix && |
3311 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || |
3312 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) |
3313 | continue; |
3314 | /* skip CG for VCE/UVD, it's handled specially */ |
3315 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && |
3316 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && |
3317 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && |
3318 | adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG && |
3319 | adev->ip_blocks[i].version->funcs->set_powergating_state) { |
3320 | /* enable powergating to save power */ |
3321 | r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i], |
3322 | state); |
3323 | if (r) { |
3324 | DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", |
3325 | adev->ip_blocks[i].version->funcs->name, r); |
3326 | return r; |
3327 | } |
3328 | } |
3329 | } |
3330 | return 0; |
3331 | } |
3332 | |
3333 | static int amdgpu_device_enable_mgpu_fan_boost(void) |
3334 | { |
3335 | struct amdgpu_gpu_instance *gpu_ins; |
3336 | struct amdgpu_device *adev; |
3337 | int i, ret = 0; |
3338 | |
3339 | mutex_lock(&mgpu_info.mutex); |
3340 | |
3341 | /* |
3342 | * MGPU fan boost feature should be enabled |
3343 | * only when there are two or more dGPUs in |
3344 | * the system |
3345 | */ |
3346 | if (mgpu_info.num_dgpu < 2) |
3347 | goto out; |
3348 | |
3349 | for (i = 0; i < mgpu_info.num_dgpu; i++) { |
3350 | gpu_ins = &(mgpu_info.gpu_ins[i]); |
3351 | adev = gpu_ins->adev; |
3352 | if (!(adev->flags & AMD_IS_APU) && |
3353 | !gpu_ins->mgpu_fan_enabled) { |
3354 | ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); |
3355 | if (ret) |
3356 | break; |
3357 | |
3358 | gpu_ins->mgpu_fan_enabled = 1; |
3359 | } |
3360 | } |
3361 | |
3362 | out: |
3363 | mutex_unlock(lock: &mgpu_info.mutex); |
3364 | |
3365 | return ret; |
3366 | } |
3367 | |
3368 | /** |
3369 | * amdgpu_device_ip_late_init - run late init for hardware IPs |
3370 | * |
3371 | * @adev: amdgpu_device pointer |
3372 | * |
3373 | * Late initialization pass for hardware IPs. The list of all the hardware |
3374 | * IPs that make up the asic is walked and the late_init callbacks are run. |
3375 | * late_init covers any special initialization that an IP requires |
3376 | * after all of the have been initialized or something that needs to happen |
3377 | * late in the init process. |
3378 | * Returns 0 on success, negative error code on failure. |
3379 | */ |
3380 | static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) |
3381 | { |
3382 | struct amdgpu_gpu_instance *gpu_instance; |
3383 | int i = 0, r; |
3384 | |
3385 | for (i = 0; i < adev->num_ip_blocks; i++) { |
3386 | if (!adev->ip_blocks[i].status.hw) |
3387 | continue; |
3388 | if (adev->ip_blocks[i].version->funcs->late_init) { |
3389 | r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]); |
3390 | if (r) { |
3391 | DRM_ERROR("late_init of IP block <%s> failed %d\n", |
3392 | adev->ip_blocks[i].version->funcs->name, r); |
3393 | return r; |
3394 | } |
3395 | } |
3396 | adev->ip_blocks[i].status.late_initialized = true; |
3397 | } |
3398 | |
3399 | r = amdgpu_ras_late_init(adev); |
3400 | if (r) { |
3401 | DRM_ERROR("amdgpu_ras_late_init failed %d", r); |
3402 | return r; |
3403 | } |
3404 | |
3405 | if (!amdgpu_reset_in_recovery(adev)) |
3406 | amdgpu_ras_set_error_query_ready(adev, ready: true); |
3407 | |
3408 | amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_GATE); |
3409 | amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_GATE); |
3410 | |
3411 | amdgpu_device_fill_reset_magic(adev); |
3412 | |
3413 | r = amdgpu_device_enable_mgpu_fan_boost(); |
3414 | if (r) |
3415 | DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); |
3416 | |
3417 | /* For passthrough configuration on arcturus and aldebaran, enable special handling SBR */ |
3418 | if (amdgpu_passthrough(adev) && |
3419 | ((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > 1) || |
3420 | adev->asic_type == CHIP_ALDEBARAN)) |
3421 | amdgpu_dpm_handle_passthrough_sbr(adev, enable: true); |
3422 | |
3423 | if (adev->gmc.xgmi.num_physical_nodes > 1) { |
3424 | mutex_lock(&mgpu_info.mutex); |
3425 | |
3426 | /* |
3427 | * Reset device p-state to low as this was booted with high. |
3428 | * |
3429 | * This should be performed only after all devices from the same |
3430 | * hive get initialized. |
3431 | * |
3432 | * However, it's unknown how many device in the hive in advance. |
3433 | * As this is counted one by one during devices initializations. |
3434 | * |
3435 | * So, we wait for all XGMI interlinked devices initialized. |
3436 | * This may bring some delays as those devices may come from |
3437 | * different hives. But that should be OK. |
3438 | */ |
3439 | if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) { |
3440 | for (i = 0; i < mgpu_info.num_gpu; i++) { |
3441 | gpu_instance = &(mgpu_info.gpu_ins[i]); |
3442 | if (gpu_instance->adev->flags & AMD_IS_APU) |
3443 | continue; |
3444 | |
3445 | r = amdgpu_xgmi_set_pstate(adev: gpu_instance->adev, |
3446 | pstate: AMDGPU_XGMI_PSTATE_MIN); |
3447 | if (r) { |
3448 | DRM_ERROR("pstate setting failed (%d).\n", r); |
3449 | break; |
3450 | } |
3451 | } |
3452 | } |
3453 | |
3454 | mutex_unlock(lock: &mgpu_info.mutex); |
3455 | } |
3456 | |
3457 | return 0; |
3458 | } |
3459 | |
3460 | static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block) |
3461 | { |
3462 | int r; |
3463 | |
3464 | if (!ip_block->version->funcs->hw_fini) { |
3465 | DRM_ERROR("hw_fini of IP block <%s> not defined\n", |
3466 | ip_block->version->funcs->name); |
3467 | } else { |
3468 | r = ip_block->version->funcs->hw_fini(ip_block); |
3469 | /* XXX handle errors */ |
3470 | if (r) { |
3471 | DRM_DEBUG("hw_fini of IP block <%s> failed %d\n", |
3472 | ip_block->version->funcs->name, r); |
3473 | } |
3474 | } |
3475 | |
3476 | ip_block->status.hw = false; |
3477 | } |
3478 | |
3479 | /** |
3480 | * amdgpu_device_smu_fini_early - smu hw_fini wrapper |
3481 | * |
3482 | * @adev: amdgpu_device pointer |
3483 | * |
3484 | * For ASICs need to disable SMC first |
3485 | */ |
3486 | static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev) |
3487 | { |
3488 | int i; |
3489 | |
3490 | if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) > IP_VERSION(9, 0, 0)) |
3491 | return; |
3492 | |
3493 | for (i = 0; i < adev->num_ip_blocks; i++) { |
3494 | if (!adev->ip_blocks[i].status.hw) |
3495 | continue; |
3496 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { |
3497 | amdgpu_ip_block_hw_fini(ip_block: &adev->ip_blocks[i]); |
3498 | break; |
3499 | } |
3500 | } |
3501 | } |
3502 | |
3503 | static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev) |
3504 | { |
3505 | int i, r; |
3506 | |
3507 | for (i = 0; i < adev->num_ip_blocks; i++) { |
3508 | if (!adev->ip_blocks[i].version->funcs->early_fini) |
3509 | continue; |
3510 | |
3511 | r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]); |
3512 | if (r) { |
3513 | DRM_DEBUG("early_fini of IP block <%s> failed %d\n", |
3514 | adev->ip_blocks[i].version->funcs->name, r); |
3515 | } |
3516 | } |
3517 | |
3518 | amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_UNGATE); |
3519 | amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_UNGATE); |
3520 | |
3521 | amdgpu_amdkfd_suspend(adev, run_pm: false); |
3522 | amdgpu_userq_suspend(adev); |
3523 | |
3524 | /* Workaround for ASICs need to disable SMC first */ |
3525 | amdgpu_device_smu_fini_early(adev); |
3526 | |
3527 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { |
3528 | if (!adev->ip_blocks[i].status.hw) |
3529 | continue; |
3530 | |
3531 | amdgpu_ip_block_hw_fini(ip_block: &adev->ip_blocks[i]); |
3532 | } |
3533 | |
3534 | if (amdgpu_sriov_vf(adev)) { |
3535 | if (amdgpu_virt_release_full_gpu(adev, init: false)) |
3536 | DRM_ERROR("failed to release exclusive mode on fini\n"); |
3537 | } |
3538 | |
3539 | return 0; |
3540 | } |
3541 | |
3542 | /** |
3543 | * amdgpu_device_ip_fini - run fini for hardware IPs |
3544 | * |
3545 | * @adev: amdgpu_device pointer |
3546 | * |
3547 | * Main teardown pass for hardware IPs. The list of all the hardware |
3548 | * IPs that make up the asic is walked and the hw_fini and sw_fini callbacks |
3549 | * are run. hw_fini tears down the hardware associated with each IP |
3550 | * and sw_fini tears down any software state associated with each IP. |
3551 | * Returns 0 on success, negative error code on failure. |
3552 | */ |
3553 | static int amdgpu_device_ip_fini(struct amdgpu_device *adev) |
3554 | { |
3555 | int i, r; |
3556 | |
3557 | amdgpu_cper_fini(adev); |
3558 | |
3559 | if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done) |
3560 | amdgpu_virt_release_ras_err_handler_data(adev); |
3561 | |
3562 | if (adev->gmc.xgmi.num_physical_nodes > 1) |
3563 | amdgpu_xgmi_remove_device(adev); |
3564 | |
3565 | amdgpu_amdkfd_device_fini_sw(adev); |
3566 | |
3567 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { |
3568 | if (!adev->ip_blocks[i].status.sw) |
3569 | continue; |
3570 | |
3571 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { |
3572 | amdgpu_ucode_free_bo(adev); |
3573 | amdgpu_free_static_csa(bo: &adev->virt.csa_obj); |
3574 | amdgpu_device_wb_fini(adev); |
3575 | amdgpu_device_mem_scratch_fini(adev); |
3576 | amdgpu_ib_pool_fini(adev); |
3577 | amdgpu_seq64_fini(adev); |
3578 | amdgpu_doorbell_fini(adev); |
3579 | } |
3580 | if (adev->ip_blocks[i].version->funcs->sw_fini) { |
3581 | r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]); |
3582 | /* XXX handle errors */ |
3583 | if (r) { |
3584 | DRM_DEBUG("sw_fini of IP block <%s> failed %d\n", |
3585 | adev->ip_blocks[i].version->funcs->name, r); |
3586 | } |
3587 | } |
3588 | adev->ip_blocks[i].status.sw = false; |
3589 | adev->ip_blocks[i].status.valid = false; |
3590 | } |
3591 | |
3592 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { |
3593 | if (!adev->ip_blocks[i].status.late_initialized) |
3594 | continue; |
3595 | if (adev->ip_blocks[i].version->funcs->late_fini) |
3596 | adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]); |
3597 | adev->ip_blocks[i].status.late_initialized = false; |
3598 | } |
3599 | |
3600 | amdgpu_ras_fini(adev); |
3601 | |
3602 | return 0; |
3603 | } |
3604 | |
3605 | /** |
3606 | * amdgpu_device_delayed_init_work_handler - work handler for IB tests |
3607 | * |
3608 | * @work: work_struct. |
3609 | */ |
3610 | static void amdgpu_device_delayed_init_work_handler(struct work_struct *work) |
3611 | { |
3612 | struct amdgpu_device *adev = |
3613 | container_of(work, struct amdgpu_device, delayed_init_work.work); |
3614 | int r; |
3615 | |
3616 | r = amdgpu_ib_ring_tests(adev); |
3617 | if (r) |
3618 | DRM_ERROR("ib ring test failed (%d).\n", r); |
3619 | } |
3620 | |
3621 | static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) |
3622 | { |
3623 | struct amdgpu_device *adev = |
3624 | container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); |
3625 | |
3626 | WARN_ON_ONCE(adev->gfx.gfx_off_state); |
3627 | WARN_ON_ONCE(adev->gfx.gfx_off_req_count); |
3628 | |
3629 | if (!amdgpu_dpm_set_powergating_by_smu(adev, block_type: AMD_IP_BLOCK_TYPE_GFX, gate: true, inst: 0)) |
3630 | adev->gfx.gfx_off_state = true; |
3631 | } |
3632 | |
3633 | /** |
3634 | * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1) |
3635 | * |
3636 | * @adev: amdgpu_device pointer |
3637 | * |
3638 | * Main suspend function for hardware IPs. The list of all the hardware |
3639 | * IPs that make up the asic is walked, clockgating is disabled and the |
3640 | * suspend callbacks are run. suspend puts the hardware and software state |
3641 | * in each IP into a state suitable for suspend. |
3642 | * Returns 0 on success, negative error code on failure. |
3643 | */ |
3644 | static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) |
3645 | { |
3646 | int i, r; |
3647 | |
3648 | amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_UNGATE); |
3649 | amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_UNGATE); |
3650 | |
3651 | /* |
3652 | * Per PMFW team's suggestion, driver needs to handle gfxoff |
3653 | * and df cstate features disablement for gpu reset(e.g. Mode1Reset) |
3654 | * scenario. Add the missing df cstate disablement here. |
3655 | */ |
3656 | if (amdgpu_dpm_set_df_cstate(adev, cstate: DF_CSTATE_DISALLOW)) |
3657 | dev_warn(adev->dev, "Failed to disallow df cstate"); |
3658 | |
3659 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { |
3660 | if (!adev->ip_blocks[i].status.valid) |
3661 | continue; |
3662 | |
3663 | /* displays are handled separately */ |
3664 | if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE) |
3665 | continue; |
3666 | |
3667 | /* XXX handle errors */ |
3668 | r = amdgpu_ip_block_suspend(ip_block: &adev->ip_blocks[i]); |
3669 | if (r) |
3670 | return r; |
3671 | } |
3672 | |
3673 | return 0; |
3674 | } |
3675 | |
3676 | /** |
3677 | * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2) |
3678 | * |
3679 | * @adev: amdgpu_device pointer |
3680 | * |
3681 | * Main suspend function for hardware IPs. The list of all the hardware |
3682 | * IPs that make up the asic is walked, clockgating is disabled and the |
3683 | * suspend callbacks are run. suspend puts the hardware and software state |
3684 | * in each IP into a state suitable for suspend. |
3685 | * Returns 0 on success, negative error code on failure. |
3686 | */ |
3687 | static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) |
3688 | { |
3689 | int i, r; |
3690 | |
3691 | if (adev->in_s0ix) |
3692 | amdgpu_dpm_gfx_state_change(adev, state: sGpuChangeState_D3Entry); |
3693 | |
3694 | for (i = adev->num_ip_blocks - 1; i >= 0; i--) { |
3695 | if (!adev->ip_blocks[i].status.valid) |
3696 | continue; |
3697 | /* displays are handled in phase1 */ |
3698 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) |
3699 | continue; |
3700 | /* PSP lost connection when err_event_athub occurs */ |
3701 | if (amdgpu_ras_intr_triggered() && |
3702 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { |
3703 | adev->ip_blocks[i].status.hw = false; |
3704 | continue; |
3705 | } |
3706 | |
3707 | /* skip unnecessary suspend if we do not initialize them yet */ |
3708 | if (!amdgpu_ip_member_of_hwini( |
3709 | adev, block: adev->ip_blocks[i].version->type)) |
3710 | continue; |
3711 | |
3712 | /* Since we skip suspend for S0i3, we need to cancel the delayed |
3713 | * idle work here as the suspend callback never gets called. |
3714 | */ |
3715 | if (adev->in_s0ix && |
3716 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX && |
3717 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) >= IP_VERSION(10, 0, 0)) |
3718 | cancel_delayed_work_sync(dwork: &adev->gfx.idle_work); |
3719 | /* skip suspend of gfx/mes and psp for S0ix |
3720 | * gfx is in gfxoff state, so on resume it will exit gfxoff just |
3721 | * like at runtime. PSP is also part of the always on hardware |
3722 | * so no need to suspend it. |
3723 | */ |
3724 | if (adev->in_s0ix && |
3725 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP || |
3726 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX || |
3727 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES)) |
3728 | continue; |
3729 | |
3730 | /* SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF */ |
3731 | if (adev->in_s0ix && |
3732 | (amdgpu_ip_version(adev, ip: SDMA0_HWIP, inst: 0) >= |
3733 | IP_VERSION(5, 0, 0)) && |
3734 | (adev->ip_blocks[i].version->type == |
3735 | AMD_IP_BLOCK_TYPE_SDMA)) |
3736 | continue; |
3737 | |
3738 | /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot. |
3739 | * These are in TMR, hence are expected to be reused by PSP-TOS to reload |
3740 | * from this location and RLC Autoload automatically also gets loaded |
3741 | * from here based on PMFW -> PSP message during re-init sequence. |
3742 | * Therefore, the psp suspend & resume should be skipped to avoid destroy |
3743 | * the TMR and reload FWs again for IMU enabled APU ASICs. |
3744 | */ |
3745 | if (amdgpu_in_reset(adev) && |
3746 | (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs && |
3747 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) |
3748 | continue; |
3749 | |
3750 | /* XXX handle errors */ |
3751 | r = amdgpu_ip_block_suspend(ip_block: &adev->ip_blocks[i]); |
3752 | adev->ip_blocks[i].status.hw = false; |
3753 | |
3754 | /* handle putting the SMC in the appropriate state */ |
3755 | if (!amdgpu_sriov_vf(adev)) { |
3756 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { |
3757 | r = amdgpu_dpm_set_mp1_state(adev, mp1_state: adev->mp1_state); |
3758 | if (r) { |
3759 | DRM_ERROR("SMC failed to set mp1 state %d, %d\n", |
3760 | adev->mp1_state, r); |
3761 | return r; |
3762 | } |
3763 | } |
3764 | } |
3765 | } |
3766 | |
3767 | return 0; |
3768 | } |
3769 | |
3770 | /** |
3771 | * amdgpu_device_ip_suspend - run suspend for hardware IPs |
3772 | * |
3773 | * @adev: amdgpu_device pointer |
3774 | * |
3775 | * Main suspend function for hardware IPs. The list of all the hardware |
3776 | * IPs that make up the asic is walked, clockgating is disabled and the |
3777 | * suspend callbacks are run. suspend puts the hardware and software state |
3778 | * in each IP into a state suitable for suspend. |
3779 | * Returns 0 on success, negative error code on failure. |
3780 | */ |
3781 | int amdgpu_device_ip_suspend(struct amdgpu_device *adev) |
3782 | { |
3783 | int r; |
3784 | |
3785 | if (amdgpu_sriov_vf(adev)) { |
3786 | amdgpu_virt_fini_data_exchange(adev); |
3787 | amdgpu_virt_request_full_gpu(adev, init: false); |
3788 | } |
3789 | |
3790 | amdgpu_ttm_set_buffer_funcs_status(adev, enable: false); |
3791 | |
3792 | r = amdgpu_device_ip_suspend_phase1(adev); |
3793 | if (r) |
3794 | return r; |
3795 | r = amdgpu_device_ip_suspend_phase2(adev); |
3796 | |
3797 | if (amdgpu_sriov_vf(adev)) |
3798 | amdgpu_virt_release_full_gpu(adev, init: false); |
3799 | |
3800 | return r; |
3801 | } |
3802 | |
3803 | static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) |
3804 | { |
3805 | int i, r; |
3806 | |
3807 | static enum amd_ip_block_type ip_order[] = { |
3808 | AMD_IP_BLOCK_TYPE_COMMON, |
3809 | AMD_IP_BLOCK_TYPE_GMC, |
3810 | AMD_IP_BLOCK_TYPE_PSP, |
3811 | AMD_IP_BLOCK_TYPE_IH, |
3812 | }; |
3813 | |
3814 | for (i = 0; i < adev->num_ip_blocks; i++) { |
3815 | int j; |
3816 | struct amdgpu_ip_block *block; |
3817 | |
3818 | block = &adev->ip_blocks[i]; |
3819 | block->status.hw = false; |
3820 | |
3821 | for (j = 0; j < ARRAY_SIZE(ip_order); j++) { |
3822 | |
3823 | if (block->version->type != ip_order[j] || |
3824 | !block->status.valid) |
3825 | continue; |
3826 | |
3827 | r = block->version->funcs->hw_init(&adev->ip_blocks[i]); |
3828 | if (r) { |
3829 | dev_err(adev->dev, "RE-INIT-early: %s failed\n", |
3830 | block->version->funcs->name); |
3831 | return r; |
3832 | } |
3833 | block->status.hw = true; |
3834 | } |
3835 | } |
3836 | |
3837 | return 0; |
3838 | } |
3839 | |
3840 | static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev) |
3841 | { |
3842 | struct amdgpu_ip_block *block; |
3843 | int i, r = 0; |
3844 | |
3845 | static enum amd_ip_block_type ip_order[] = { |
3846 | AMD_IP_BLOCK_TYPE_SMC, |
3847 | AMD_IP_BLOCK_TYPE_DCE, |
3848 | AMD_IP_BLOCK_TYPE_GFX, |
3849 | AMD_IP_BLOCK_TYPE_SDMA, |
3850 | AMD_IP_BLOCK_TYPE_MES, |
3851 | AMD_IP_BLOCK_TYPE_UVD, |
3852 | AMD_IP_BLOCK_TYPE_VCE, |
3853 | AMD_IP_BLOCK_TYPE_VCN, |
3854 | AMD_IP_BLOCK_TYPE_JPEG |
3855 | }; |
3856 | |
3857 | for (i = 0; i < ARRAY_SIZE(ip_order); i++) { |
3858 | block = amdgpu_device_ip_get_ip_block(adev, type: ip_order[i]); |
3859 | |
3860 | if (!block) |
3861 | continue; |
3862 | |
3863 | if (block->status.valid && !block->status.hw) { |
3864 | if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) { |
3865 | r = amdgpu_ip_block_resume(ip_block: block); |
3866 | } else { |
3867 | r = block->version->funcs->hw_init(block); |
3868 | } |
3869 | |
3870 | if (r) { |
3871 | dev_err(adev->dev, "RE-INIT-late: %s failed\n", |
3872 | block->version->funcs->name); |
3873 | break; |
3874 | } |
3875 | block->status.hw = true; |
3876 | } |
3877 | } |
3878 | |
3879 | return r; |
3880 | } |
3881 | |
3882 | /** |
3883 | * amdgpu_device_ip_resume_phase1 - run resume for hardware IPs |
3884 | * |
3885 | * @adev: amdgpu_device pointer |
3886 | * |
3887 | * First resume function for hardware IPs. The list of all the hardware |
3888 | * IPs that make up the asic is walked and the resume callbacks are run for |
3889 | * COMMON, GMC, and IH. resume puts the hardware into a functional state |
3890 | * after a suspend and updates the software state as necessary. This |
3891 | * function is also used for restoring the GPU after a GPU reset. |
3892 | * Returns 0 on success, negative error code on failure. |
3893 | */ |
3894 | static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev) |
3895 | { |
3896 | int i, r; |
3897 | |
3898 | for (i = 0; i < adev->num_ip_blocks; i++) { |
3899 | if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) |
3900 | continue; |
3901 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || |
3902 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || |
3903 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || |
3904 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) { |
3905 | |
3906 | r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]); |
3907 | if (r) |
3908 | return r; |
3909 | } |
3910 | } |
3911 | |
3912 | return 0; |
3913 | } |
3914 | |
3915 | /** |
3916 | * amdgpu_device_ip_resume_phase2 - run resume for hardware IPs |
3917 | * |
3918 | * @adev: amdgpu_device pointer |
3919 | * |
3920 | * Second resume function for hardware IPs. The list of all the hardware |
3921 | * IPs that make up the asic is walked and the resume callbacks are run for |
3922 | * all blocks except COMMON, GMC, and IH. resume puts the hardware into a |
3923 | * functional state after a suspend and updates the software state as |
3924 | * necessary. This function is also used for restoring the GPU after a GPU |
3925 | * reset. |
3926 | * Returns 0 on success, negative error code on failure. |
3927 | */ |
3928 | static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) |
3929 | { |
3930 | int i, r; |
3931 | |
3932 | for (i = 0; i < adev->num_ip_blocks; i++) { |
3933 | if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) |
3934 | continue; |
3935 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || |
3936 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || |
3937 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || |
3938 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE || |
3939 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) |
3940 | continue; |
3941 | r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]); |
3942 | if (r) |
3943 | return r; |
3944 | } |
3945 | |
3946 | return 0; |
3947 | } |
3948 | |
3949 | /** |
3950 | * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs |
3951 | * |
3952 | * @adev: amdgpu_device pointer |
3953 | * |
3954 | * Third resume function for hardware IPs. The list of all the hardware |
3955 | * IPs that make up the asic is walked and the resume callbacks are run for |
3956 | * all DCE. resume puts the hardware into a functional state after a suspend |
3957 | * and updates the software state as necessary. This function is also used |
3958 | * for restoring the GPU after a GPU reset. |
3959 | * |
3960 | * Returns 0 on success, negative error code on failure. |
3961 | */ |
3962 | static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev) |
3963 | { |
3964 | int i, r; |
3965 | |
3966 | for (i = 0; i < adev->num_ip_blocks; i++) { |
3967 | if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw) |
3968 | continue; |
3969 | if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { |
3970 | r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]); |
3971 | if (r) |
3972 | return r; |
3973 | } |
3974 | } |
3975 | |
3976 | return 0; |
3977 | } |
3978 | |
3979 | /** |
3980 | * amdgpu_device_ip_resume - run resume for hardware IPs |
3981 | * |
3982 | * @adev: amdgpu_device pointer |
3983 | * |
3984 | * Main resume function for hardware IPs. The hardware IPs |
3985 | * are split into two resume functions because they are |
3986 | * also used in recovering from a GPU reset and some additional |
3987 | * steps need to be take between them. In this case (S3/S4) they are |
3988 | * run sequentially. |
3989 | * Returns 0 on success, negative error code on failure. |
3990 | */ |
3991 | static int amdgpu_device_ip_resume(struct amdgpu_device *adev) |
3992 | { |
3993 | int r; |
3994 | |
3995 | r = amdgpu_device_ip_resume_phase1(adev); |
3996 | if (r) |
3997 | return r; |
3998 | |
3999 | r = amdgpu_device_fw_loading(adev); |
4000 | if (r) |
4001 | return r; |
4002 | |
4003 | r = amdgpu_device_ip_resume_phase2(adev); |
4004 | |
4005 | if (adev->mman.buffer_funcs_ring->sched.ready) |
4006 | amdgpu_ttm_set_buffer_funcs_status(adev, enable: true); |
4007 | |
4008 | if (r) |
4009 | return r; |
4010 | |
4011 | amdgpu_fence_driver_hw_init(adev); |
4012 | |
4013 | r = amdgpu_device_ip_resume_phase3(adev); |
4014 | |
4015 | return r; |
4016 | } |
4017 | |
4018 | /** |
4019 | * amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV |
4020 | * |
4021 | * @adev: amdgpu_device pointer |
4022 | * |
4023 | * Query the VBIOS data tables to determine if the board supports SR-IOV. |
4024 | */ |
4025 | static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev) |
4026 | { |
4027 | if (amdgpu_sriov_vf(adev)) { |
4028 | if (adev->is_atom_fw) { |
4029 | if (amdgpu_atomfirmware_gpu_virtualization_supported(adev)) |
4030 | adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; |
4031 | } else { |
4032 | if (amdgpu_atombios_has_gpu_virtualization_table(adev)) |
4033 | adev->virt.caps |= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS; |
4034 | } |
4035 | |
4036 | if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS)) |
4037 | amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_NO_VBIOS, error_flags: 0, error_data: 0); |
4038 | } |
4039 | } |
4040 | |
4041 | /** |
4042 | * amdgpu_device_asic_has_dc_support - determine if DC supports the asic |
4043 | * |
4044 | * @asic_type: AMD asic type |
4045 | * |
4046 | * Check if there is DC (new modesetting infrastructre) support for an asic. |
4047 | * returns true if DC has support, false if not. |
4048 | */ |
4049 | bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) |
4050 | { |
4051 | switch (asic_type) { |
4052 | #ifdef CONFIG_DRM_AMDGPU_SI |
4053 | case CHIP_HAINAN: |
4054 | #endif |
4055 | case CHIP_TOPAZ: |
4056 | /* chips with no display hardware */ |
4057 | return false; |
4058 | #if defined(CONFIG_DRM_AMD_DC) |
4059 | case CHIP_TAHITI: |
4060 | case CHIP_PITCAIRN: |
4061 | case CHIP_VERDE: |
4062 | case CHIP_OLAND: |
4063 | /* |
4064 | * We have systems in the wild with these ASICs that require |
4065 | * LVDS and VGA support which is not supported with DC. |
4066 | * |
4067 | * Fallback to the non-DC driver here by default so as not to |
4068 | * cause regressions. |
4069 | */ |
4070 | #if defined(CONFIG_DRM_AMD_DC_SI) |
4071 | return amdgpu_dc > 0; |
4072 | #else |
4073 | return false; |
4074 | #endif |
4075 | case CHIP_BONAIRE: |
4076 | case CHIP_KAVERI: |
4077 | case CHIP_KABINI: |
4078 | case CHIP_MULLINS: |
4079 | /* |
4080 | * We have systems in the wild with these ASICs that require |
4081 | * VGA support which is not supported with DC. |
4082 | * |
4083 | * Fallback to the non-DC driver here by default so as not to |
4084 | * cause regressions. |
4085 | */ |
4086 | return amdgpu_dc > 0; |
4087 | default: |
4088 | return amdgpu_dc != 0; |
4089 | #else |
4090 | default: |
4091 | if (amdgpu_dc > 0) |
4092 | DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n"); |
4093 | return false; |
4094 | #endif |
4095 | } |
4096 | } |
4097 | |
4098 | /** |
4099 | * amdgpu_device_has_dc_support - check if dc is supported |
4100 | * |
4101 | * @adev: amdgpu_device pointer |
4102 | * |
4103 | * Returns true for supported, false for not supported |
4104 | */ |
4105 | bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) |
4106 | { |
4107 | if (adev->enable_virtual_display || |
4108 | (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) |
4109 | return false; |
4110 | |
4111 | return amdgpu_device_asic_has_dc_support(asic_type: adev->asic_type); |
4112 | } |
4113 | |
4114 | static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) |
4115 | { |
4116 | struct amdgpu_device *adev = |
4117 | container_of(__work, struct amdgpu_device, xgmi_reset_work); |
4118 | struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); |
4119 | |
4120 | /* It's a bug to not have a hive within this function */ |
4121 | if (WARN_ON(!hive)) |
4122 | return; |
4123 | |
4124 | /* |
4125 | * Use task barrier to synchronize all xgmi reset works across the |
4126 | * hive. task_barrier_enter and task_barrier_exit will block |
4127 | * until all the threads running the xgmi reset works reach |
4128 | * those points. task_barrier_full will do both blocks. |
4129 | */ |
4130 | if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) { |
4131 | |
4132 | task_barrier_enter(tb: &hive->tb); |
4133 | adev->asic_reset_res = amdgpu_device_baco_enter(dev: adev_to_drm(adev)); |
4134 | |
4135 | if (adev->asic_reset_res) |
4136 | goto fail; |
4137 | |
4138 | task_barrier_exit(tb: &hive->tb); |
4139 | adev->asic_reset_res = amdgpu_device_baco_exit(dev: adev_to_drm(adev)); |
4140 | |
4141 | if (adev->asic_reset_res) |
4142 | goto fail; |
4143 | |
4144 | amdgpu_ras_reset_error_count(adev, block: AMDGPU_RAS_BLOCK__MMHUB); |
4145 | } else { |
4146 | |
4147 | task_barrier_full(tb: &hive->tb); |
4148 | adev->asic_reset_res = amdgpu_asic_reset(adev); |
4149 | } |
4150 | |
4151 | fail: |
4152 | if (adev->asic_reset_res) |
4153 | DRM_WARN("ASIC reset failed with error, %d for drm dev, %s", |
4154 | adev->asic_reset_res, adev_to_drm(adev)->unique); |
4155 | amdgpu_put_xgmi_hive(hive); |
4156 | } |
4157 | |
4158 | static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) |
4159 | { |
4160 | char *input = amdgpu_lockup_timeout; |
4161 | char *timeout_setting = NULL; |
4162 | int index = 0; |
4163 | long timeout; |
4164 | int ret = 0; |
4165 | |
4166 | /* |
4167 | * By default timeout for non compute jobs is 10000 |
4168 | * and 60000 for compute jobs. |
4169 | * In SR-IOV or passthrough mode, timeout for compute |
4170 | * jobs are 60000 by default. |
4171 | */ |
4172 | adev->gfx_timeout = msecs_to_jiffies(m: 10000); |
4173 | adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; |
4174 | if (amdgpu_sriov_vf(adev)) |
4175 | adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? |
4176 | msecs_to_jiffies(m: 60000) : msecs_to_jiffies(m: 10000); |
4177 | else |
4178 | adev->compute_timeout = msecs_to_jiffies(m: 60000); |
4179 | |
4180 | if (strnlen(p: input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { |
4181 | while ((timeout_setting = strsep(&input, ",")) && |
4182 | strnlen(p: timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { |
4183 | ret = kstrtol(s: timeout_setting, base: 0, res: &timeout); |
4184 | if (ret) |
4185 | return ret; |
4186 | |
4187 | if (timeout == 0) { |
4188 | index++; |
4189 | continue; |
4190 | } else if (timeout < 0) { |
4191 | timeout = MAX_SCHEDULE_TIMEOUT; |
4192 | dev_warn(adev->dev, "lockup timeout disabled"); |
4193 | add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK); |
4194 | } else { |
4195 | timeout = msecs_to_jiffies(m: timeout); |
4196 | } |
4197 | |
4198 | switch (index++) { |
4199 | case 0: |
4200 | adev->gfx_timeout = timeout; |
4201 | break; |
4202 | case 1: |
4203 | adev->compute_timeout = timeout; |
4204 | break; |
4205 | case 2: |
4206 | adev->sdma_timeout = timeout; |
4207 | break; |
4208 | case 3: |
4209 | adev->video_timeout = timeout; |
4210 | break; |
4211 | default: |
4212 | break; |
4213 | } |
4214 | } |
4215 | /* |
4216 | * There is only one value specified and |
4217 | * it should apply to all non-compute jobs. |
4218 | */ |
4219 | if (index == 1) { |
4220 | adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout; |
4221 | if (amdgpu_sriov_vf(adev) || amdgpu_passthrough(adev)) |
4222 | adev->compute_timeout = adev->gfx_timeout; |
4223 | } |
4224 | } |
4225 | |
4226 | return ret; |
4227 | } |
4228 | |
4229 | /** |
4230 | * amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU |
4231 | * |
4232 | * @adev: amdgpu_device pointer |
4233 | * |
4234 | * RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode |
4235 | */ |
4236 | static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev) |
4237 | { |
4238 | struct iommu_domain *domain; |
4239 | |
4240 | domain = iommu_get_domain_for_dev(dev: adev->dev); |
4241 | if (!domain || domain->type == IOMMU_DOMAIN_IDENTITY) |
4242 | adev->ram_is_direct_mapped = true; |
4243 | } |
4244 | |
4245 | #if defined(CONFIG_HSA_AMD_P2P) |
4246 | /** |
4247 | * amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled. |
4248 | * |
4249 | * @adev: amdgpu_device pointer |
4250 | * |
4251 | * return if IOMMU remapping bar address |
4252 | */ |
4253 | static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev) |
4254 | { |
4255 | struct iommu_domain *domain; |
4256 | |
4257 | domain = iommu_get_domain_for_dev(dev: adev->dev); |
4258 | if (domain && (domain->type == IOMMU_DOMAIN_DMA || |
4259 | domain->type == IOMMU_DOMAIN_DMA_FQ)) |
4260 | return true; |
4261 | |
4262 | return false; |
4263 | } |
4264 | #endif |
4265 | |
4266 | static void amdgpu_device_set_mcbp(struct amdgpu_device *adev) |
4267 | { |
4268 | if (amdgpu_mcbp == 1) |
4269 | adev->gfx.mcbp = true; |
4270 | else if (amdgpu_mcbp == 0) |
4271 | adev->gfx.mcbp = false; |
4272 | |
4273 | if (amdgpu_sriov_vf(adev)) |
4274 | adev->gfx.mcbp = true; |
4275 | |
4276 | if (adev->gfx.mcbp) |
4277 | DRM_INFO("MCBP is enabled\n"); |
4278 | } |
4279 | |
4280 | /** |
4281 | * amdgpu_device_init - initialize the driver |
4282 | * |
4283 | * @adev: amdgpu_device pointer |
4284 | * @flags: driver flags |
4285 | * |
4286 | * Initializes the driver info and hw (all asics). |
4287 | * Returns 0 for success or an error on failure. |
4288 | * Called at driver startup. |
4289 | */ |
4290 | int amdgpu_device_init(struct amdgpu_device *adev, |
4291 | uint32_t flags) |
4292 | { |
4293 | struct drm_device *ddev = adev_to_drm(adev); |
4294 | struct pci_dev *pdev = adev->pdev; |
4295 | int r, i; |
4296 | bool px = false; |
4297 | u32 max_MBps; |
4298 | int tmp; |
4299 | |
4300 | adev->shutdown = false; |
4301 | adev->flags = flags; |
4302 | |
4303 | if (amdgpu_force_asic_type >= 0 && amdgpu_force_asic_type < CHIP_LAST) |
4304 | adev->asic_type = amdgpu_force_asic_type; |
4305 | else |
4306 | adev->asic_type = flags & AMD_ASIC_MASK; |
4307 | |
4308 | adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT; |
4309 | if (amdgpu_emu_mode == 1) |
4310 | adev->usec_timeout *= 10; |
4311 | adev->gmc.gart_size = 512 * 1024 * 1024; |
4312 | adev->accel_working = false; |
4313 | adev->num_rings = 0; |
4314 | RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub()); |
4315 | adev->mman.buffer_funcs = NULL; |
4316 | adev->mman.buffer_funcs_ring = NULL; |
4317 | adev->vm_manager.vm_pte_funcs = NULL; |
4318 | adev->vm_manager.vm_pte_num_scheds = 0; |
4319 | adev->gmc.gmc_funcs = NULL; |
4320 | adev->harvest_ip_mask = 0x0; |
4321 | adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); |
4322 | bitmap_zero(dst: adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); |
4323 | |
4324 | adev->smc_rreg = &amdgpu_invalid_rreg; |
4325 | adev->smc_wreg = &amdgpu_invalid_wreg; |
4326 | adev->pcie_rreg = &amdgpu_invalid_rreg; |
4327 | adev->pcie_wreg = &amdgpu_invalid_wreg; |
4328 | adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext; |
4329 | adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext; |
4330 | adev->pciep_rreg = &amdgpu_invalid_rreg; |
4331 | adev->pciep_wreg = &amdgpu_invalid_wreg; |
4332 | adev->pcie_rreg64 = &amdgpu_invalid_rreg64; |
4333 | adev->pcie_wreg64 = &amdgpu_invalid_wreg64; |
4334 | adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext; |
4335 | adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext; |
4336 | adev->uvd_ctx_rreg = &amdgpu_invalid_rreg; |
4337 | adev->uvd_ctx_wreg = &amdgpu_invalid_wreg; |
4338 | adev->didt_rreg = &amdgpu_invalid_rreg; |
4339 | adev->didt_wreg = &amdgpu_invalid_wreg; |
4340 | adev->gc_cac_rreg = &amdgpu_invalid_rreg; |
4341 | adev->gc_cac_wreg = &amdgpu_invalid_wreg; |
4342 | adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg; |
4343 | adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg; |
4344 | |
4345 | DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n", |
4346 | amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device, |
4347 | pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision); |
4348 | |
4349 | /* mutex initialization are all done here so we |
4350 | * can recall function without having locking issues |
4351 | */ |
4352 | mutex_init(&adev->firmware.mutex); |
4353 | mutex_init(&adev->pm.mutex); |
4354 | mutex_init(&adev->gfx.gpu_clock_mutex); |
4355 | mutex_init(&adev->srbm_mutex); |
4356 | mutex_init(&adev->gfx.pipe_reserve_mutex); |
4357 | mutex_init(&adev->gfx.gfx_off_mutex); |
4358 | mutex_init(&adev->gfx.partition_mutex); |
4359 | mutex_init(&adev->grbm_idx_mutex); |
4360 | mutex_init(&adev->mn_lock); |
4361 | mutex_init(&adev->virt.vf_errors.lock); |
4362 | hash_init(adev->mn_hash); |
4363 | mutex_init(&adev->psp.mutex); |
4364 | mutex_init(&adev->notifier_lock); |
4365 | mutex_init(&adev->pm.stable_pstate_ctx_lock); |
4366 | mutex_init(&adev->benchmark_mutex); |
4367 | mutex_init(&adev->gfx.reset_sem_mutex); |
4368 | /* Initialize the mutex for cleaner shader isolation between GFX and compute processes */ |
4369 | mutex_init(&adev->enforce_isolation_mutex); |
4370 | for (i = 0; i < MAX_XCP; ++i) { |
4371 | adev->isolation[i].spearhead = dma_fence_get_stub(); |
4372 | amdgpu_sync_create(sync: &adev->isolation[i].active); |
4373 | amdgpu_sync_create(sync: &adev->isolation[i].prev); |
4374 | } |
4375 | mutex_init(&adev->gfx.userq_sch_mutex); |
4376 | mutex_init(&adev->gfx.workload_profile_mutex); |
4377 | mutex_init(&adev->vcn.workload_profile_mutex); |
4378 | mutex_init(&adev->userq_mutex); |
4379 | |
4380 | amdgpu_device_init_apu_flags(adev); |
4381 | |
4382 | r = amdgpu_device_check_arguments(adev); |
4383 | if (r) |
4384 | return r; |
4385 | |
4386 | spin_lock_init(&adev->mmio_idx_lock); |
4387 | spin_lock_init(&adev->smc_idx_lock); |
4388 | spin_lock_init(&adev->pcie_idx_lock); |
4389 | spin_lock_init(&adev->uvd_ctx_idx_lock); |
4390 | spin_lock_init(&adev->didt_idx_lock); |
4391 | spin_lock_init(&adev->gc_cac_idx_lock); |
4392 | spin_lock_init(&adev->se_cac_idx_lock); |
4393 | spin_lock_init(&adev->audio_endpt_idx_lock); |
4394 | spin_lock_init(&adev->mm_stats.lock); |
4395 | spin_lock_init(&adev->virt.rlcg_reg_lock); |
4396 | spin_lock_init(&adev->wb.lock); |
4397 | |
4398 | xa_init_flags(xa: &adev->userq_xa, XA_FLAGS_LOCK_IRQ); |
4399 | |
4400 | INIT_LIST_HEAD(list: &adev->reset_list); |
4401 | |
4402 | INIT_LIST_HEAD(list: &adev->ras_list); |
4403 | |
4404 | INIT_LIST_HEAD(list: &adev->pm.od_kobj_list); |
4405 | |
4406 | INIT_LIST_HEAD(list: &adev->userq_mgr_list); |
4407 | |
4408 | INIT_DELAYED_WORK(&adev->delayed_init_work, |
4409 | amdgpu_device_delayed_init_work_handler); |
4410 | INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, |
4411 | amdgpu_device_delay_enable_gfx_off); |
4412 | /* |
4413 | * Initialize the enforce_isolation work structures for each XCP |
4414 | * partition. This work handler is responsible for enforcing shader |
4415 | * isolation on AMD GPUs. It counts the number of emitted fences for |
4416 | * each GFX and compute ring. If there are any fences, it schedules |
4417 | * the `enforce_isolation_work` to be run after a delay. If there are |
4418 | * no fences, it signals the Kernel Fusion Driver (KFD) to resume the |
4419 | * runqueue. |
4420 | */ |
4421 | for (i = 0; i < MAX_XCP; i++) { |
4422 | INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work, |
4423 | amdgpu_gfx_enforce_isolation_handler); |
4424 | adev->gfx.enforce_isolation[i].adev = adev; |
4425 | adev->gfx.enforce_isolation[i].xcp_id = i; |
4426 | } |
4427 | |
4428 | INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); |
4429 | |
4430 | adev->gfx.gfx_off_req_count = 1; |
4431 | adev->gfx.gfx_off_residency = 0; |
4432 | adev->gfx.gfx_off_entrycount = 0; |
4433 | adev->pm.ac_power = power_supply_is_system_supplied() > 0; |
4434 | |
4435 | atomic_set(v: &adev->throttling_logging_enabled, i: 1); |
4436 | /* |
4437 | * If throttling continues, logging will be performed every minute |
4438 | * to avoid log flooding. "-1" is subtracted since the thermal |
4439 | * throttling interrupt comes every second. Thus, the total logging |
4440 | * interval is 59 seconds(retelimited printk interval) + 1(waiting |
4441 | * for throttling interrupt) = 60 seconds. |
4442 | */ |
4443 | ratelimit_state_init(rs: &adev->throttling_logging_rs, interval: (60 - 1) * HZ, burst: 1); |
4444 | |
4445 | ratelimit_set_flags(rs: &adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE); |
4446 | |
4447 | /* Registers mapping */ |
4448 | /* TODO: block userspace mapping of io register */ |
4449 | if (adev->asic_type >= CHIP_BONAIRE) { |
4450 | adev->rmmio_base = pci_resource_start(adev->pdev, 5); |
4451 | adev->rmmio_size = pci_resource_len(adev->pdev, 5); |
4452 | } else { |
4453 | adev->rmmio_base = pci_resource_start(adev->pdev, 2); |
4454 | adev->rmmio_size = pci_resource_len(adev->pdev, 2); |
4455 | } |
4456 | |
4457 | for (i = 0; i < AMD_IP_BLOCK_TYPE_NUM; i++) |
4458 | atomic_set(v: &adev->pm.pwr_state[i], i: POWER_STATE_UNKNOWN); |
4459 | |
4460 | adev->rmmio = ioremap(offset: adev->rmmio_base, size: adev->rmmio_size); |
4461 | if (!adev->rmmio) |
4462 | return -ENOMEM; |
4463 | |
4464 | DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); |
4465 | DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size); |
4466 | |
4467 | /* |
4468 | * Reset domain needs to be present early, before XGMI hive discovered |
4469 | * (if any) and initialized to use reset sem and in_gpu reset flag |
4470 | * early on during init and before calling to RREG32. |
4471 | */ |
4472 | adev->reset_domain = amdgpu_reset_create_reset_domain(type: SINGLE_DEVICE, wq_name: "amdgpu-reset-dev"); |
4473 | if (!adev->reset_domain) |
4474 | return -ENOMEM; |
4475 | |
4476 | /* detect hw virtualization here */ |
4477 | amdgpu_virt_init(adev); |
4478 | |
4479 | amdgpu_device_get_pcie_info(adev); |
4480 | |
4481 | r = amdgpu_device_get_job_timeout_settings(adev); |
4482 | if (r) { |
4483 | dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n"); |
4484 | return r; |
4485 | } |
4486 | |
4487 | amdgpu_device_set_mcbp(adev); |
4488 | |
4489 | /* |
4490 | * By default, use default mode where all blocks are expected to be |
4491 | * initialized. At present a 'swinit' of blocks is required to be |
4492 | * completed before the need for a different level is detected. |
4493 | */ |
4494 | amdgpu_set_init_level(adev, lvl: AMDGPU_INIT_LEVEL_DEFAULT); |
4495 | /* early init functions */ |
4496 | r = amdgpu_device_ip_early_init(adev); |
4497 | if (r) |
4498 | return r; |
4499 | |
4500 | /* |
4501 | * No need to remove conflicting FBs for non-display class devices. |
4502 | * This prevents the sysfb from being freed accidently. |
4503 | */ |
4504 | if ((pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA || |
4505 | (pdev->class >> 8) == PCI_CLASS_DISPLAY_OTHER) { |
4506 | /* Get rid of things like offb */ |
4507 | r = aperture_remove_conflicting_pci_devices(pdev: adev->pdev, name: amdgpu_kms_driver.name); |
4508 | if (r) |
4509 | return r; |
4510 | } |
4511 | |
4512 | /* Enable TMZ based on IP_VERSION */ |
4513 | amdgpu_gmc_tmz_set(adev); |
4514 | |
4515 | if (amdgpu_sriov_vf(adev) && |
4516 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) >= IP_VERSION(10, 3, 0)) |
4517 | /* VF MMIO access (except mailbox range) from CPU |
4518 | * will be blocked during sriov runtime |
4519 | */ |
4520 | adev->virt.caps |= AMDGPU_VF_MMIO_ACCESS_PROTECT; |
4521 | |
4522 | amdgpu_gmc_noretry_set(adev); |
4523 | /* Need to get xgmi info early to decide the reset behavior*/ |
4524 | if (adev->gmc.xgmi.supported) { |
4525 | r = adev->gfxhub.funcs->get_xgmi_info(adev); |
4526 | if (r) |
4527 | return r; |
4528 | } |
4529 | |
4530 | /* enable PCIE atomic ops */ |
4531 | if (amdgpu_sriov_vf(adev)) { |
4532 | if (adev->virt.fw_reserve.p_pf2vf) |
4533 | adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *) |
4534 | adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags == |
4535 | (PCI_EXP_DEVCAP2_ATOMIC_COMP32 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); |
4536 | /* APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a |
4537 | * internal path natively support atomics, set have_atomics_support to true. |
4538 | */ |
4539 | } else if ((adev->flags & AMD_IS_APU) && |
4540 | (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) > |
4541 | IP_VERSION(9, 0, 0))) { |
4542 | adev->have_atomics_support = true; |
4543 | } else { |
4544 | adev->have_atomics_support = |
4545 | !pci_enable_atomic_ops_to_root(dev: adev->pdev, |
4546 | PCI_EXP_DEVCAP2_ATOMIC_COMP32 | |
4547 | PCI_EXP_DEVCAP2_ATOMIC_COMP64); |
4548 | } |
4549 | |
4550 | if (!adev->have_atomics_support) |
4551 | dev_info(adev->dev, "PCIE atomic ops is not supported\n"); |
4552 | |
4553 | /* doorbell bar mapping and doorbell index init*/ |
4554 | amdgpu_doorbell_init(adev); |
4555 | |
4556 | if (amdgpu_emu_mode == 1) { |
4557 | /* post the asic on emulation mode */ |
4558 | emu_soc_asic_init(adev); |
4559 | goto fence_driver_init; |
4560 | } |
4561 | |
4562 | amdgpu_reset_init(adev); |
4563 | |
4564 | /* detect if we are with an SRIOV vbios */ |
4565 | if (adev->bios) |
4566 | amdgpu_device_detect_sriov_bios(adev); |
4567 | |
4568 | /* check if we need to reset the asic |
4569 | * E.g., driver was not cleanly unloaded previously, etc. |
4570 | */ |
4571 | if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) { |
4572 | if (adev->gmc.xgmi.num_physical_nodes) { |
4573 | dev_info(adev->dev, "Pending hive reset.\n"); |
4574 | amdgpu_set_init_level(adev, |
4575 | lvl: AMDGPU_INIT_LEVEL_MINIMAL_XGMI); |
4576 | } else if (amdgpu_ip_version(adev, ip: MP1_HWIP, inst: 0) == IP_VERSION(13, 0, 10) && |
4577 | !amdgpu_device_has_display_hardware(adev)) { |
4578 | r = psp_gpu_reset(adev); |
4579 | } else { |
4580 | tmp = amdgpu_reset_method; |
4581 | /* It should do a default reset when loading or reloading the driver, |
4582 | * regardless of the module parameter reset_method. |
4583 | */ |
4584 | amdgpu_reset_method = AMD_RESET_METHOD_NONE; |
4585 | r = amdgpu_asic_reset(adev); |
4586 | amdgpu_reset_method = tmp; |
4587 | } |
4588 | |
4589 | if (r) { |
4590 | dev_err(adev->dev, "asic reset on init failed\n"); |
4591 | goto failed; |
4592 | } |
4593 | } |
4594 | |
4595 | /* Post card if necessary */ |
4596 | if (amdgpu_device_need_post(adev)) { |
4597 | if (!adev->bios) { |
4598 | dev_err(adev->dev, "no vBIOS found\n"); |
4599 | r = -EINVAL; |
4600 | goto failed; |
4601 | } |
4602 | DRM_INFO("GPU posting now...\n"); |
4603 | r = amdgpu_device_asic_init(adev); |
4604 | if (r) { |
4605 | dev_err(adev->dev, "gpu post error!\n"); |
4606 | goto failed; |
4607 | } |
4608 | } |
4609 | |
4610 | if (adev->bios) { |
4611 | if (adev->is_atom_fw) { |
4612 | /* Initialize clocks */ |
4613 | r = amdgpu_atomfirmware_get_clock_info(adev); |
4614 | if (r) { |
4615 | dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n"); |
4616 | amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, error_flags: 0, error_data: 0); |
4617 | goto failed; |
4618 | } |
4619 | } else { |
4620 | /* Initialize clocks */ |
4621 | r = amdgpu_atombios_get_clock_info(adev); |
4622 | if (r) { |
4623 | dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n"); |
4624 | amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, error_flags: 0, error_data: 0); |
4625 | goto failed; |
4626 | } |
4627 | /* init i2c buses */ |
4628 | amdgpu_i2c_init(adev); |
4629 | } |
4630 | } |
4631 | |
4632 | fence_driver_init: |
4633 | /* Fence driver */ |
4634 | r = amdgpu_fence_driver_sw_init(adev); |
4635 | if (r) { |
4636 | dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n"); |
4637 | amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_FENCE_INIT_FAIL, error_flags: 0, error_data: 0); |
4638 | goto failed; |
4639 | } |
4640 | |
4641 | /* init the mode config */ |
4642 | drm_mode_config_init(dev: adev_to_drm(adev)); |
4643 | |
4644 | r = amdgpu_device_ip_init(adev); |
4645 | if (r) { |
4646 | dev_err(adev->dev, "amdgpu_device_ip_init failed\n"); |
4647 | amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, error_flags: 0, error_data: 0); |
4648 | goto release_ras_con; |
4649 | } |
4650 | |
4651 | amdgpu_fence_driver_hw_init(adev); |
4652 | |
4653 | dev_info(adev->dev, |
4654 | "SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n", |
4655 | adev->gfx.config.max_shader_engines, |
4656 | adev->gfx.config.max_sh_per_se, |
4657 | adev->gfx.config.max_cu_per_sh, |
4658 | adev->gfx.cu_info.number); |
4659 | |
4660 | adev->accel_working = true; |
4661 | |
4662 | amdgpu_vm_check_compute_bug(adev); |
4663 | |
4664 | /* Initialize the buffer migration limit. */ |
4665 | if (amdgpu_moverate >= 0) |
4666 | max_MBps = amdgpu_moverate; |
4667 | else |
4668 | max_MBps = 8; /* Allow 8 MB/s. */ |
4669 | /* Get a log2 for easy divisions. */ |
4670 | adev->mm_stats.log2_max_MBps = ilog2(max(1u, max_MBps)); |
4671 | |
4672 | /* |
4673 | * Register gpu instance before amdgpu_device_enable_mgpu_fan_boost. |
4674 | * Otherwise the mgpu fan boost feature will be skipped due to the |
4675 | * gpu instance is counted less. |
4676 | */ |
4677 | amdgpu_register_gpu_instance(adev); |
4678 | |
4679 | /* enable clockgating, etc. after ib tests, etc. since some blocks require |
4680 | * explicit gating rather than handling it automatically. |
4681 | */ |
4682 | if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) { |
4683 | r = amdgpu_device_ip_late_init(adev); |
4684 | if (r) { |
4685 | dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n"); |
4686 | amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, error_flags: 0, error_data: r); |
4687 | goto release_ras_con; |
4688 | } |
4689 | /* must succeed. */ |
4690 | amdgpu_ras_resume(adev); |
4691 | queue_delayed_work(wq: system_wq, dwork: &adev->delayed_init_work, |
4692 | delay: msecs_to_jiffies(AMDGPU_RESUME_MS)); |
4693 | } |
4694 | |
4695 | if (amdgpu_sriov_vf(adev)) { |
4696 | amdgpu_virt_release_full_gpu(adev, init: true); |
4697 | flush_delayed_work(dwork: &adev->delayed_init_work); |
4698 | } |
4699 | |
4700 | /* |
4701 | * Place those sysfs registering after `late_init`. As some of those |
4702 | * operations performed in `late_init` might affect the sysfs |
4703 | * interfaces creating. |
4704 | */ |
4705 | r = amdgpu_atombios_sysfs_init(adev); |
4706 | if (r) |
4707 | drm_err(&adev->ddev, |
4708 | "registering atombios sysfs failed (%d).\n", r); |
4709 | |
4710 | r = amdgpu_pm_sysfs_init(adev); |
4711 | if (r) |
4712 | DRM_ERROR("registering pm sysfs failed (%d).\n", r); |
4713 | |
4714 | r = amdgpu_ucode_sysfs_init(adev); |
4715 | if (r) { |
4716 | adev->ucode_sysfs_en = false; |
4717 | DRM_ERROR("Creating firmware sysfs failed (%d).\n", r); |
4718 | } else |
4719 | adev->ucode_sysfs_en = true; |
4720 | |
4721 | r = amdgpu_device_attr_sysfs_init(adev); |
4722 | if (r) |
4723 | dev_err(adev->dev, "Could not create amdgpu device attr\n"); |
4724 | |
4725 | r = devm_device_add_group(dev: adev->dev, grp: &amdgpu_board_attrs_group); |
4726 | if (r) |
4727 | dev_err(adev->dev, |
4728 | "Could not create amdgpu board attributes\n"); |
4729 | |
4730 | amdgpu_fru_sysfs_init(adev); |
4731 | amdgpu_reg_state_sysfs_init(adev); |
4732 | amdgpu_xcp_sysfs_init(adev); |
4733 | |
4734 | if (IS_ENABLED(CONFIG_PERF_EVENTS)) |
4735 | r = amdgpu_pmu_init(adev); |
4736 | if (r) |
4737 | dev_err(adev->dev, "amdgpu_pmu_init failed\n"); |
4738 | |
4739 | /* Have stored pci confspace at hand for restore in sudden PCI error */ |
4740 | if (amdgpu_device_cache_pci_state(pdev: adev->pdev)) |
4741 | pci_restore_state(dev: pdev); |
4742 | |
4743 | /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ |
4744 | /* this will fail for cards that aren't VGA class devices, just |
4745 | * ignore it |
4746 | */ |
4747 | if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) |
4748 | vga_client_register(pdev: adev->pdev, set_decode: amdgpu_device_vga_set_decode); |
4749 | |
4750 | px = amdgpu_device_supports_px(dev: ddev); |
4751 | |
4752 | if (px || (!dev_is_removable(dev: &adev->pdev->dev) && |
4753 | apple_gmux_detect(NULL, NULL))) |
4754 | vga_switcheroo_register_client(dev: adev->pdev, |
4755 | ops: &amdgpu_switcheroo_ops, driver_power_control: px); |
4756 | |
4757 | if (px) |
4758 | vga_switcheroo_init_domain_pm_ops(dev: adev->dev, domain: &adev->vga_pm_domain); |
4759 | |
4760 | if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI) |
4761 | amdgpu_xgmi_reset_on_init(adev); |
4762 | |
4763 | amdgpu_device_check_iommu_direct_map(adev); |
4764 | |
4765 | adev->pm_nb.notifier_call = amdgpu_device_pm_notifier; |
4766 | r = register_pm_notifier(nb: &adev->pm_nb); |
4767 | if (r) |
4768 | goto failed; |
4769 | |
4770 | return 0; |
4771 | |
4772 | release_ras_con: |
4773 | if (amdgpu_sriov_vf(adev)) |
4774 | amdgpu_virt_release_full_gpu(adev, init: true); |
4775 | |
4776 | /* failed in exclusive mode due to timeout */ |
4777 | if (amdgpu_sriov_vf(adev) && |
4778 | !amdgpu_sriov_runtime(adev) && |
4779 | amdgpu_virt_mmio_blocked(adev) && |
4780 | !amdgpu_virt_wait_reset(adev)) { |
4781 | dev_err(adev->dev, "VF exclusive mode timeout\n"); |
4782 | /* Don't send request since VF is inactive. */ |
4783 | adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; |
4784 | adev->virt.ops = NULL; |
4785 | r = -EAGAIN; |
4786 | } |
4787 | amdgpu_release_ras_context(adev); |
4788 | |
4789 | failed: |
4790 | amdgpu_vf_error_trans_all(adev); |
4791 | |
4792 | return r; |
4793 | } |
4794 | |
4795 | static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev) |
4796 | { |
4797 | |
4798 | /* Clear all CPU mappings pointing to this device */ |
4799 | unmap_mapping_range(mapping: adev->ddev.anon_inode->i_mapping, holebegin: 0, holelen: 0, even_cows: 1); |
4800 | |
4801 | /* Unmap all mapped bars - Doorbell, registers and VRAM */ |
4802 | amdgpu_doorbell_fini(adev); |
4803 | |
4804 | iounmap(addr: adev->rmmio); |
4805 | adev->rmmio = NULL; |
4806 | if (adev->mman.aper_base_kaddr) |
4807 | iounmap(addr: adev->mman.aper_base_kaddr); |
4808 | adev->mman.aper_base_kaddr = NULL; |
4809 | |
4810 | /* Memory manager related */ |
4811 | if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) { |
4812 | arch_phys_wc_del(handle: adev->gmc.vram_mtrr); |
4813 | arch_io_free_memtype_wc(start: adev->gmc.aper_base, size: adev->gmc.aper_size); |
4814 | } |
4815 | } |
4816 | |
4817 | /** |
4818 | * amdgpu_device_fini_hw - tear down the driver |
4819 | * |
4820 | * @adev: amdgpu_device pointer |
4821 | * |
4822 | * Tear down the driver info (all asics). |
4823 | * Called at driver shutdown. |
4824 | */ |
4825 | void amdgpu_device_fini_hw(struct amdgpu_device *adev) |
4826 | { |
4827 | dev_info(adev->dev, "amdgpu: finishing device.\n"); |
4828 | flush_delayed_work(dwork: &adev->delayed_init_work); |
4829 | |
4830 | if (adev->mman.initialized) |
4831 | drain_workqueue(wq: adev->mman.bdev.wq); |
4832 | adev->shutdown = true; |
4833 | |
4834 | unregister_pm_notifier(nb: &adev->pm_nb); |
4835 | |
4836 | /* make sure IB test finished before entering exclusive mode |
4837 | * to avoid preemption on IB test |
4838 | */ |
4839 | if (amdgpu_sriov_vf(adev)) { |
4840 | amdgpu_virt_request_full_gpu(adev, init: false); |
4841 | amdgpu_virt_fini_data_exchange(adev); |
4842 | } |
4843 | |
4844 | /* disable all interrupts */ |
4845 | amdgpu_irq_disable_all(adev); |
4846 | if (adev->mode_info.mode_config_initialized) { |
4847 | if (!drm_drv_uses_atomic_modeset(dev: adev_to_drm(adev))) |
4848 | drm_helper_force_disable_all(dev: adev_to_drm(adev)); |
4849 | else |
4850 | drm_atomic_helper_shutdown(dev: adev_to_drm(adev)); |
4851 | } |
4852 | amdgpu_fence_driver_hw_fini(adev); |
4853 | |
4854 | if (adev->pm.sysfs_initialized) |
4855 | amdgpu_pm_sysfs_fini(adev); |
4856 | if (adev->ucode_sysfs_en) |
4857 | amdgpu_ucode_sysfs_fini(adev); |
4858 | amdgpu_device_attr_sysfs_fini(adev); |
4859 | amdgpu_fru_sysfs_fini(adev); |
4860 | |
4861 | amdgpu_reg_state_sysfs_fini(adev); |
4862 | amdgpu_xcp_sysfs_fini(adev); |
4863 | |
4864 | /* disable ras feature must before hw fini */ |
4865 | amdgpu_ras_pre_fini(adev); |
4866 | |
4867 | amdgpu_ttm_set_buffer_funcs_status(adev, enable: false); |
4868 | |
4869 | amdgpu_device_ip_fini_early(adev); |
4870 | |
4871 | amdgpu_irq_fini_hw(adev); |
4872 | |
4873 | if (adev->mman.initialized) |
4874 | ttm_device_clear_dma_mappings(bdev: &adev->mman.bdev); |
4875 | |
4876 | amdgpu_gart_dummy_page_fini(adev); |
4877 | |
4878 | if (drm_dev_is_unplugged(dev: adev_to_drm(adev))) |
4879 | amdgpu_device_unmap_mmio(adev); |
4880 | |
4881 | } |
4882 | |
4883 | void amdgpu_device_fini_sw(struct amdgpu_device *adev) |
4884 | { |
4885 | int i, idx; |
4886 | bool px; |
4887 | |
4888 | amdgpu_device_ip_fini(adev); |
4889 | amdgpu_fence_driver_sw_fini(adev); |
4890 | amdgpu_ucode_release(fw: &adev->firmware.gpu_info_fw); |
4891 | adev->accel_working = false; |
4892 | dma_fence_put(rcu_dereference_protected(adev->gang_submit, true)); |
4893 | for (i = 0; i < MAX_XCP; ++i) { |
4894 | dma_fence_put(fence: adev->isolation[i].spearhead); |
4895 | amdgpu_sync_free(sync: &adev->isolation[i].active); |
4896 | amdgpu_sync_free(sync: &adev->isolation[i].prev); |
4897 | } |
4898 | |
4899 | amdgpu_reset_fini(adev); |
4900 | |
4901 | /* free i2c buses */ |
4902 | amdgpu_i2c_fini(adev); |
4903 | |
4904 | if (adev->bios) { |
4905 | if (amdgpu_emu_mode != 1) |
4906 | amdgpu_atombios_fini(adev); |
4907 | amdgpu_bios_release(adev); |
4908 | } |
4909 | |
4910 | kfree(objp: adev->fru_info); |
4911 | adev->fru_info = NULL; |
4912 | |
4913 | kfree(objp: adev->xcp_mgr); |
4914 | adev->xcp_mgr = NULL; |
4915 | |
4916 | px = amdgpu_device_supports_px(dev: adev_to_drm(adev)); |
4917 | |
4918 | if (px || (!dev_is_removable(dev: &adev->pdev->dev) && |
4919 | apple_gmux_detect(NULL, NULL))) |
4920 | vga_switcheroo_unregister_client(dev: adev->pdev); |
4921 | |
4922 | if (px) |
4923 | vga_switcheroo_fini_domain_pm_ops(dev: adev->dev); |
4924 | |
4925 | if ((adev->pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA) |
4926 | vga_client_unregister(pdev: adev->pdev); |
4927 | |
4928 | if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) { |
4929 | |
4930 | iounmap(addr: adev->rmmio); |
4931 | adev->rmmio = NULL; |
4932 | drm_dev_exit(idx); |
4933 | } |
4934 | |
4935 | if (IS_ENABLED(CONFIG_PERF_EVENTS)) |
4936 | amdgpu_pmu_fini(adev); |
4937 | if (adev->mman.discovery_bin) |
4938 | amdgpu_discovery_fini(adev); |
4939 | |
4940 | amdgpu_reset_put_reset_domain(domain: adev->reset_domain); |
4941 | adev->reset_domain = NULL; |
4942 | |
4943 | kfree(objp: adev->pci_state); |
4944 | |
4945 | } |
4946 | |
4947 | /** |
4948 | * amdgpu_device_evict_resources - evict device resources |
4949 | * @adev: amdgpu device object |
4950 | * |
4951 | * Evicts all ttm device resources(vram BOs, gart table) from the lru list |
4952 | * of the vram memory type. Mainly used for evicting device resources |
4953 | * at suspend time. |
4954 | * |
4955 | */ |
4956 | static int amdgpu_device_evict_resources(struct amdgpu_device *adev) |
4957 | { |
4958 | int ret; |
4959 | |
4960 | /* No need to evict vram on APUs unless going to S4 */ |
4961 | if (!adev->in_s4 && (adev->flags & AMD_IS_APU)) |
4962 | return 0; |
4963 | |
4964 | ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM); |
4965 | if (ret) |
4966 | DRM_WARN("evicting device resources failed\n"); |
4967 | return ret; |
4968 | } |
4969 | |
4970 | /* |
4971 | * Suspend & resume. |
4972 | */ |
4973 | /** |
4974 | * amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events |
4975 | * @nb: notifier block |
4976 | * @mode: suspend mode |
4977 | * @data: data |
4978 | * |
4979 | * This function is called when the system is about to suspend or hibernate. |
4980 | * It is used to set the appropriate flags so that eviction can be optimized |
4981 | * in the pm prepare callback. |
4982 | */ |
4983 | static int amdgpu_device_pm_notifier(struct notifier_block *nb, unsigned long mode, |
4984 | void *data) |
4985 | { |
4986 | struct amdgpu_device *adev = container_of(nb, struct amdgpu_device, pm_nb); |
4987 | |
4988 | switch (mode) { |
4989 | case PM_HIBERNATION_PREPARE: |
4990 | adev->in_s4 = true; |
4991 | break; |
4992 | case PM_POST_HIBERNATION: |
4993 | adev->in_s4 = false; |
4994 | break; |
4995 | } |
4996 | |
4997 | return NOTIFY_DONE; |
4998 | } |
4999 | |
5000 | /** |
5001 | * amdgpu_device_prepare - prepare for device suspend |
5002 | * |
5003 | * @dev: drm dev pointer |
5004 | * |
5005 | * Prepare to put the hw in the suspend state (all asics). |
5006 | * Returns 0 for success or an error on failure. |
5007 | * Called at driver suspend. |
5008 | */ |
5009 | int amdgpu_device_prepare(struct drm_device *dev) |
5010 | { |
5011 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
5012 | int i, r; |
5013 | |
5014 | if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) |
5015 | return 0; |
5016 | |
5017 | /* Evict the majority of BOs before starting suspend sequence */ |
5018 | r = amdgpu_device_evict_resources(adev); |
5019 | if (r) |
5020 | return r; |
5021 | |
5022 | flush_delayed_work(dwork: &adev->gfx.gfx_off_delay_work); |
5023 | |
5024 | for (i = 0; i < adev->num_ip_blocks; i++) { |
5025 | if (!adev->ip_blocks[i].status.valid) |
5026 | continue; |
5027 | if (!adev->ip_blocks[i].version->funcs->prepare_suspend) |
5028 | continue; |
5029 | r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]); |
5030 | if (r) |
5031 | return r; |
5032 | } |
5033 | |
5034 | return 0; |
5035 | } |
5036 | |
5037 | /** |
5038 | * amdgpu_device_suspend - initiate device suspend |
5039 | * |
5040 | * @dev: drm dev pointer |
5041 | * @notify_clients: notify in-kernel DRM clients |
5042 | * |
5043 | * Puts the hw in the suspend state (all asics). |
5044 | * Returns 0 for success or an error on failure. |
5045 | * Called at driver suspend. |
5046 | */ |
5047 | int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients) |
5048 | { |
5049 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
5050 | int r = 0; |
5051 | |
5052 | if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) |
5053 | return 0; |
5054 | |
5055 | adev->in_suspend = true; |
5056 | |
5057 | if (amdgpu_sriov_vf(adev)) { |
5058 | amdgpu_virt_fini_data_exchange(adev); |
5059 | r = amdgpu_virt_request_full_gpu(adev, init: false); |
5060 | if (r) |
5061 | return r; |
5062 | } |
5063 | |
5064 | if (amdgpu_acpi_smart_shift_update(dev, ss_state: AMDGPU_SS_DEV_D3)) |
5065 | DRM_WARN("smart shift update failed\n"); |
5066 | |
5067 | if (notify_clients) |
5068 | drm_client_dev_suspend(dev: adev_to_drm(adev), holds_console_lock: false); |
5069 | |
5070 | cancel_delayed_work_sync(dwork: &adev->delayed_init_work); |
5071 | |
5072 | amdgpu_ras_suspend(adev); |
5073 | |
5074 | amdgpu_device_ip_suspend_phase1(adev); |
5075 | |
5076 | if (!adev->in_s0ix) { |
5077 | amdgpu_amdkfd_suspend(adev, run_pm: adev->in_runpm); |
5078 | amdgpu_userq_suspend(adev); |
5079 | } |
5080 | |
5081 | r = amdgpu_device_evict_resources(adev); |
5082 | if (r) |
5083 | return r; |
5084 | |
5085 | amdgpu_ttm_set_buffer_funcs_status(adev, enable: false); |
5086 | |
5087 | amdgpu_fence_driver_hw_fini(adev); |
5088 | |
5089 | amdgpu_device_ip_suspend_phase2(adev); |
5090 | |
5091 | if (amdgpu_sriov_vf(adev)) |
5092 | amdgpu_virt_release_full_gpu(adev, init: false); |
5093 | |
5094 | r = amdgpu_dpm_notify_rlc_state(adev, en: false); |
5095 | if (r) |
5096 | return r; |
5097 | |
5098 | return 0; |
5099 | } |
5100 | |
5101 | /** |
5102 | * amdgpu_device_resume - initiate device resume |
5103 | * |
5104 | * @dev: drm dev pointer |
5105 | * @notify_clients: notify in-kernel DRM clients |
5106 | * |
5107 | * Bring the hw back to operating state (all asics). |
5108 | * Returns 0 for success or an error on failure. |
5109 | * Called at driver resume. |
5110 | */ |
5111 | int amdgpu_device_resume(struct drm_device *dev, bool notify_clients) |
5112 | { |
5113 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
5114 | int r = 0; |
5115 | |
5116 | if (amdgpu_sriov_vf(adev)) { |
5117 | r = amdgpu_virt_request_full_gpu(adev, init: true); |
5118 | if (r) |
5119 | return r; |
5120 | } |
5121 | |
5122 | if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) |
5123 | return 0; |
5124 | |
5125 | if (adev->in_s0ix) |
5126 | amdgpu_dpm_gfx_state_change(adev, state: sGpuChangeState_D0Entry); |
5127 | |
5128 | /* post card */ |
5129 | if (amdgpu_device_need_post(adev)) { |
5130 | r = amdgpu_device_asic_init(adev); |
5131 | if (r) |
5132 | dev_err(adev->dev, "amdgpu asic init failed\n"); |
5133 | } |
5134 | |
5135 | r = amdgpu_device_ip_resume(adev); |
5136 | |
5137 | if (r) { |
5138 | dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r); |
5139 | goto exit; |
5140 | } |
5141 | |
5142 | if (!adev->in_s0ix) { |
5143 | r = amdgpu_amdkfd_resume(adev, run_pm: adev->in_runpm); |
5144 | if (r) |
5145 | goto exit; |
5146 | |
5147 | r = amdgpu_userq_resume(adev); |
5148 | if (r) |
5149 | goto exit; |
5150 | } |
5151 | |
5152 | r = amdgpu_device_ip_late_init(adev); |
5153 | if (r) |
5154 | goto exit; |
5155 | |
5156 | queue_delayed_work(wq: system_wq, dwork: &adev->delayed_init_work, |
5157 | delay: msecs_to_jiffies(AMDGPU_RESUME_MS)); |
5158 | exit: |
5159 | if (amdgpu_sriov_vf(adev)) { |
5160 | amdgpu_virt_init_data_exchange(adev); |
5161 | amdgpu_virt_release_full_gpu(adev, init: true); |
5162 | } |
5163 | |
5164 | if (r) |
5165 | return r; |
5166 | |
5167 | /* Make sure IB tests flushed */ |
5168 | flush_delayed_work(dwork: &adev->delayed_init_work); |
5169 | |
5170 | if (notify_clients) |
5171 | drm_client_dev_resume(dev: adev_to_drm(adev), holds_console_lock: false); |
5172 | |
5173 | amdgpu_ras_resume(adev); |
5174 | |
5175 | if (adev->mode_info.num_crtc) { |
5176 | /* |
5177 | * Most of the connector probing functions try to acquire runtime pm |
5178 | * refs to ensure that the GPU is powered on when connector polling is |
5179 | * performed. Since we're calling this from a runtime PM callback, |
5180 | * trying to acquire rpm refs will cause us to deadlock. |
5181 | * |
5182 | * Since we're guaranteed to be holding the rpm lock, it's safe to |
5183 | * temporarily disable the rpm helpers so this doesn't deadlock us. |
5184 | */ |
5185 | #ifdef CONFIG_PM |
5186 | dev->dev->power.disable_depth++; |
5187 | #endif |
5188 | if (!adev->dc_enabled) |
5189 | drm_helper_hpd_irq_event(dev); |
5190 | else |
5191 | drm_kms_helper_hotplug_event(dev); |
5192 | #ifdef CONFIG_PM |
5193 | dev->dev->power.disable_depth--; |
5194 | #endif |
5195 | } |
5196 | adev->in_suspend = false; |
5197 | |
5198 | if (amdgpu_acpi_smart_shift_update(dev, ss_state: AMDGPU_SS_DEV_D0)) |
5199 | DRM_WARN("smart shift update failed\n"); |
5200 | |
5201 | return 0; |
5202 | } |
5203 | |
5204 | /** |
5205 | * amdgpu_device_ip_check_soft_reset - did soft reset succeed |
5206 | * |
5207 | * @adev: amdgpu_device pointer |
5208 | * |
5209 | * The list of all the hardware IPs that make up the asic is walked and |
5210 | * the check_soft_reset callbacks are run. check_soft_reset determines |
5211 | * if the asic is still hung or not. |
5212 | * Returns true if any of the IPs are still in a hung state, false if not. |
5213 | */ |
5214 | static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev) |
5215 | { |
5216 | int i; |
5217 | bool asic_hang = false; |
5218 | |
5219 | if (amdgpu_sriov_vf(adev)) |
5220 | return true; |
5221 | |
5222 | if (amdgpu_asic_need_full_reset(adev)) |
5223 | return true; |
5224 | |
5225 | for (i = 0; i < adev->num_ip_blocks; i++) { |
5226 | if (!adev->ip_blocks[i].status.valid) |
5227 | continue; |
5228 | if (adev->ip_blocks[i].version->funcs->check_soft_reset) |
5229 | adev->ip_blocks[i].status.hang = |
5230 | adev->ip_blocks[i].version->funcs->check_soft_reset( |
5231 | &adev->ip_blocks[i]); |
5232 | if (adev->ip_blocks[i].status.hang) { |
5233 | dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name); |
5234 | asic_hang = true; |
5235 | } |
5236 | } |
5237 | return asic_hang; |
5238 | } |
5239 | |
5240 | /** |
5241 | * amdgpu_device_ip_pre_soft_reset - prepare for soft reset |
5242 | * |
5243 | * @adev: amdgpu_device pointer |
5244 | * |
5245 | * The list of all the hardware IPs that make up the asic is walked and the |
5246 | * pre_soft_reset callbacks are run if the block is hung. pre_soft_reset |
5247 | * handles any IP specific hardware or software state changes that are |
5248 | * necessary for a soft reset to succeed. |
5249 | * Returns 0 on success, negative error code on failure. |
5250 | */ |
5251 | static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev) |
5252 | { |
5253 | int i, r = 0; |
5254 | |
5255 | for (i = 0; i < adev->num_ip_blocks; i++) { |
5256 | if (!adev->ip_blocks[i].status.valid) |
5257 | continue; |
5258 | if (adev->ip_blocks[i].status.hang && |
5259 | adev->ip_blocks[i].version->funcs->pre_soft_reset) { |
5260 | r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]); |
5261 | if (r) |
5262 | return r; |
5263 | } |
5264 | } |
5265 | |
5266 | return 0; |
5267 | } |
5268 | |
5269 | /** |
5270 | * amdgpu_device_ip_need_full_reset - check if a full asic reset is needed |
5271 | * |
5272 | * @adev: amdgpu_device pointer |
5273 | * |
5274 | * Some hardware IPs cannot be soft reset. If they are hung, a full gpu |
5275 | * reset is necessary to recover. |
5276 | * Returns true if a full asic reset is required, false if not. |
5277 | */ |
5278 | static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev) |
5279 | { |
5280 | int i; |
5281 | |
5282 | if (amdgpu_asic_need_full_reset(adev)) |
5283 | return true; |
5284 | |
5285 | for (i = 0; i < adev->num_ip_blocks; i++) { |
5286 | if (!adev->ip_blocks[i].status.valid) |
5287 | continue; |
5288 | if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) || |
5289 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) || |
5290 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) || |
5291 | (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) || |
5292 | adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { |
5293 | if (adev->ip_blocks[i].status.hang) { |
5294 | dev_info(adev->dev, "Some block need full reset!\n"); |
5295 | return true; |
5296 | } |
5297 | } |
5298 | } |
5299 | return false; |
5300 | } |
5301 | |
5302 | /** |
5303 | * amdgpu_device_ip_soft_reset - do a soft reset |
5304 | * |
5305 | * @adev: amdgpu_device pointer |
5306 | * |
5307 | * The list of all the hardware IPs that make up the asic is walked and the |
5308 | * soft_reset callbacks are run if the block is hung. soft_reset handles any |
5309 | * IP specific hardware or software state changes that are necessary to soft |
5310 | * reset the IP. |
5311 | * Returns 0 on success, negative error code on failure. |
5312 | */ |
5313 | static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev) |
5314 | { |
5315 | int i, r = 0; |
5316 | |
5317 | for (i = 0; i < adev->num_ip_blocks; i++) { |
5318 | if (!adev->ip_blocks[i].status.valid) |
5319 | continue; |
5320 | if (adev->ip_blocks[i].status.hang && |
5321 | adev->ip_blocks[i].version->funcs->soft_reset) { |
5322 | r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]); |
5323 | if (r) |
5324 | return r; |
5325 | } |
5326 | } |
5327 | |
5328 | return 0; |
5329 | } |
5330 | |
5331 | /** |
5332 | * amdgpu_device_ip_post_soft_reset - clean up from soft reset |
5333 | * |
5334 | * @adev: amdgpu_device pointer |
5335 | * |
5336 | * The list of all the hardware IPs that make up the asic is walked and the |
5337 | * post_soft_reset callbacks are run if the asic was hung. post_soft_reset |
5338 | * handles any IP specific hardware or software state changes that are |
5339 | * necessary after the IP has been soft reset. |
5340 | * Returns 0 on success, negative error code on failure. |
5341 | */ |
5342 | static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) |
5343 | { |
5344 | int i, r = 0; |
5345 | |
5346 | for (i = 0; i < adev->num_ip_blocks; i++) { |
5347 | if (!adev->ip_blocks[i].status.valid) |
5348 | continue; |
5349 | if (adev->ip_blocks[i].status.hang && |
5350 | adev->ip_blocks[i].version->funcs->post_soft_reset) |
5351 | r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]); |
5352 | if (r) |
5353 | return r; |
5354 | } |
5355 | |
5356 | return 0; |
5357 | } |
5358 | |
5359 | /** |
5360 | * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf |
5361 | * |
5362 | * @adev: amdgpu_device pointer |
5363 | * @reset_context: amdgpu reset context pointer |
5364 | * |
5365 | * do VF FLR and reinitialize Asic |
5366 | * return 0 means succeeded otherwise failed |
5367 | */ |
5368 | static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, |
5369 | struct amdgpu_reset_context *reset_context) |
5370 | { |
5371 | int r; |
5372 | struct amdgpu_hive_info *hive = NULL; |
5373 | |
5374 | if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) { |
5375 | if (!amdgpu_ras_get_fed_status(adev)) |
5376 | amdgpu_virt_ready_to_reset(adev); |
5377 | amdgpu_virt_wait_reset(adev); |
5378 | clear_bit(nr: AMDGPU_HOST_FLR, addr: &reset_context->flags); |
5379 | r = amdgpu_virt_request_full_gpu(adev, init: true); |
5380 | } else { |
5381 | r = amdgpu_virt_reset_gpu(adev); |
5382 | } |
5383 | if (r) |
5384 | return r; |
5385 | |
5386 | amdgpu_ras_clear_err_state(adev); |
5387 | amdgpu_irq_gpu_reset_resume_helper(adev); |
5388 | |
5389 | /* some sw clean up VF needs to do before recover */ |
5390 | amdgpu_virt_post_reset(adev); |
5391 | |
5392 | /* Resume IP prior to SMC */ |
5393 | r = amdgpu_device_ip_reinit_early_sriov(adev); |
5394 | if (r) |
5395 | return r; |
5396 | |
5397 | amdgpu_virt_init_data_exchange(adev); |
5398 | |
5399 | r = amdgpu_device_fw_loading(adev); |
5400 | if (r) |
5401 | return r; |
5402 | |
5403 | /* now we are okay to resume SMC/CP/SDMA */ |
5404 | r = amdgpu_device_ip_reinit_late_sriov(adev); |
5405 | if (r) |
5406 | return r; |
5407 | |
5408 | hive = amdgpu_get_xgmi_hive(adev); |
5409 | /* Update PSP FW topology after reset */ |
5410 | if (hive && adev->gmc.xgmi.num_physical_nodes > 1) |
5411 | r = amdgpu_xgmi_update_topology(hive, adev); |
5412 | if (hive) |
5413 | amdgpu_put_xgmi_hive(hive); |
5414 | if (r) |
5415 | return r; |
5416 | |
5417 | r = amdgpu_ib_ring_tests(adev); |
5418 | if (r) |
5419 | return r; |
5420 | |
5421 | if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) |
5422 | amdgpu_inc_vram_lost(adev); |
5423 | |
5424 | /* need to be called during full access so we can't do it later like |
5425 | * bare-metal does. |
5426 | */ |
5427 | amdgpu_amdkfd_post_reset(adev); |
5428 | amdgpu_virt_release_full_gpu(adev, init: true); |
5429 | |
5430 | /* Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset */ |
5431 | if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 2) || |
5432 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 3) || |
5433 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 4) || |
5434 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 5, 0) || |
5435 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(11, 0, 3)) |
5436 | amdgpu_ras_resume(adev); |
5437 | |
5438 | amdgpu_virt_ras_telemetry_post_reset(adev); |
5439 | |
5440 | return 0; |
5441 | } |
5442 | |
5443 | /** |
5444 | * amdgpu_device_has_job_running - check if there is any unfinished job |
5445 | * |
5446 | * @adev: amdgpu_device pointer |
5447 | * |
5448 | * check if there is any job running on the device when guest driver receives |
5449 | * FLR notification from host driver. If there are still jobs running, then |
5450 | * the guest driver will not respond the FLR reset. Instead, let the job hit |
5451 | * the timeout and guest driver then issue the reset request. |
5452 | */ |
5453 | bool amdgpu_device_has_job_running(struct amdgpu_device *adev) |
5454 | { |
5455 | int i; |
5456 | |
5457 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
5458 | struct amdgpu_ring *ring = adev->rings[i]; |
5459 | |
5460 | if (!amdgpu_ring_sched_ready(ring)) |
5461 | continue; |
5462 | |
5463 | if (amdgpu_fence_count_emitted(ring)) |
5464 | return true; |
5465 | } |
5466 | return false; |
5467 | } |
5468 | |
5469 | /** |
5470 | * amdgpu_device_should_recover_gpu - check if we should try GPU recovery |
5471 | * |
5472 | * @adev: amdgpu_device pointer |
5473 | * |
5474 | * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover |
5475 | * a hung GPU. |
5476 | */ |
5477 | bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) |
5478 | { |
5479 | |
5480 | if (amdgpu_gpu_recovery == 0) |
5481 | goto disabled; |
5482 | |
5483 | /* Skip soft reset check in fatal error mode */ |
5484 | if (!amdgpu_ras_is_poison_mode_supported(adev)) |
5485 | return true; |
5486 | |
5487 | if (amdgpu_sriov_vf(adev)) |
5488 | return true; |
5489 | |
5490 | if (amdgpu_gpu_recovery == -1) { |
5491 | switch (adev->asic_type) { |
5492 | #ifdef CONFIG_DRM_AMDGPU_SI |
5493 | case CHIP_VERDE: |
5494 | case CHIP_TAHITI: |
5495 | case CHIP_PITCAIRN: |
5496 | case CHIP_OLAND: |
5497 | case CHIP_HAINAN: |
5498 | #endif |
5499 | #ifdef CONFIG_DRM_AMDGPU_CIK |
5500 | case CHIP_KAVERI: |
5501 | case CHIP_KABINI: |
5502 | case CHIP_MULLINS: |
5503 | #endif |
5504 | case CHIP_CARRIZO: |
5505 | case CHIP_STONEY: |
5506 | case CHIP_CYAN_SKILLFISH: |
5507 | goto disabled; |
5508 | default: |
5509 | break; |
5510 | } |
5511 | } |
5512 | |
5513 | return true; |
5514 | |
5515 | disabled: |
5516 | dev_info(adev->dev, "GPU recovery disabled.\n"); |
5517 | return false; |
5518 | } |
5519 | |
5520 | int amdgpu_device_mode1_reset(struct amdgpu_device *adev) |
5521 | { |
5522 | u32 i; |
5523 | int ret = 0; |
5524 | |
5525 | if (adev->bios) |
5526 | amdgpu_atombios_scratch_regs_engine_hung(adev, hung: true); |
5527 | |
5528 | dev_info(adev->dev, "GPU mode1 reset\n"); |
5529 | |
5530 | /* Cache the state before bus master disable. The saved config space |
5531 | * values are used in other cases like restore after mode-2 reset. |
5532 | */ |
5533 | amdgpu_device_cache_pci_state(pdev: adev->pdev); |
5534 | |
5535 | /* disable BM */ |
5536 | pci_clear_master(dev: adev->pdev); |
5537 | |
5538 | if (amdgpu_dpm_is_mode1_reset_supported(adev)) { |
5539 | dev_info(adev->dev, "GPU smu mode1 reset\n"); |
5540 | ret = amdgpu_dpm_mode1_reset(adev); |
5541 | } else { |
5542 | dev_info(adev->dev, "GPU psp mode1 reset\n"); |
5543 | ret = psp_gpu_reset(adev); |
5544 | } |
5545 | |
5546 | if (ret) |
5547 | goto mode1_reset_failed; |
5548 | |
5549 | amdgpu_device_load_pci_state(pdev: adev->pdev); |
5550 | ret = amdgpu_psp_wait_for_bootloader(adev); |
5551 | if (ret) |
5552 | goto mode1_reset_failed; |
5553 | |
5554 | /* wait for asic to come out of reset */ |
5555 | for (i = 0; i < adev->usec_timeout; i++) { |
5556 | u32 memsize = adev->nbio.funcs->get_memsize(adev); |
5557 | |
5558 | if (memsize != 0xffffffff) |
5559 | break; |
5560 | udelay(usec: 1); |
5561 | } |
5562 | |
5563 | if (i >= adev->usec_timeout) { |
5564 | ret = -ETIMEDOUT; |
5565 | goto mode1_reset_failed; |
5566 | } |
5567 | |
5568 | if (adev->bios) |
5569 | amdgpu_atombios_scratch_regs_engine_hung(adev, hung: false); |
5570 | |
5571 | return 0; |
5572 | |
5573 | mode1_reset_failed: |
5574 | dev_err(adev->dev, "GPU mode1 reset failed\n"); |
5575 | return ret; |
5576 | } |
5577 | |
5578 | int amdgpu_device_link_reset(struct amdgpu_device *adev) |
5579 | { |
5580 | int ret = 0; |
5581 | |
5582 | dev_info(adev->dev, "GPU link reset\n"); |
5583 | |
5584 | if (!adev->pcie_reset_ctx.occurs_dpc) |
5585 | ret = amdgpu_dpm_link_reset(adev); |
5586 | |
5587 | if (ret) |
5588 | goto link_reset_failed; |
5589 | |
5590 | ret = amdgpu_psp_wait_for_bootloader(adev); |
5591 | if (ret) |
5592 | goto link_reset_failed; |
5593 | |
5594 | return 0; |
5595 | |
5596 | link_reset_failed: |
5597 | dev_err(adev->dev, "GPU link reset failed\n"); |
5598 | return ret; |
5599 | } |
5600 | |
5601 | int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, |
5602 | struct amdgpu_reset_context *reset_context) |
5603 | { |
5604 | int i, r = 0; |
5605 | struct amdgpu_job *job = NULL; |
5606 | struct amdgpu_device *tmp_adev = reset_context->reset_req_dev; |
5607 | bool need_full_reset = |
5608 | test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); |
5609 | |
5610 | if (reset_context->reset_req_dev == adev) |
5611 | job = reset_context->job; |
5612 | |
5613 | if (amdgpu_sriov_vf(adev)) |
5614 | amdgpu_virt_pre_reset(adev); |
5615 | |
5616 | amdgpu_fence_driver_isr_toggle(adev, stop: true); |
5617 | |
5618 | /* block all schedulers and reset given job's ring */ |
5619 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
5620 | struct amdgpu_ring *ring = adev->rings[i]; |
5621 | |
5622 | if (!amdgpu_ring_sched_ready(ring)) |
5623 | continue; |
5624 | |
5625 | /* Clear job fence from fence drv to avoid force_completion |
5626 | * leave NULL and vm flush fence in fence drv |
5627 | */ |
5628 | amdgpu_fence_driver_clear_job_fences(ring); |
5629 | |
5630 | /* after all hw jobs are reset, hw fence is meaningless, so force_completion */ |
5631 | amdgpu_fence_driver_force_completion(ring); |
5632 | } |
5633 | |
5634 | amdgpu_fence_driver_isr_toggle(adev, stop: false); |
5635 | |
5636 | if (job && job->vm) |
5637 | drm_sched_increase_karma(bad: &job->base); |
5638 | |
5639 | r = amdgpu_reset_prepare_hwcontext(adev, reset_context); |
5640 | /* If reset handler not implemented, continue; otherwise return */ |
5641 | if (r == -EOPNOTSUPP) |
5642 | r = 0; |
5643 | else |
5644 | return r; |
5645 | |
5646 | /* Don't suspend on bare metal if we are not going to HW reset the ASIC */ |
5647 | if (!amdgpu_sriov_vf(adev)) { |
5648 | |
5649 | if (!need_full_reset) |
5650 | need_full_reset = amdgpu_device_ip_need_full_reset(adev); |
5651 | |
5652 | if (!need_full_reset && amdgpu_gpu_recovery && |
5653 | amdgpu_device_ip_check_soft_reset(adev)) { |
5654 | amdgpu_device_ip_pre_soft_reset(adev); |
5655 | r = amdgpu_device_ip_soft_reset(adev); |
5656 | amdgpu_device_ip_post_soft_reset(adev); |
5657 | if (r || amdgpu_device_ip_check_soft_reset(adev)) { |
5658 | dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n"); |
5659 | need_full_reset = true; |
5660 | } |
5661 | } |
5662 | |
5663 | if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) { |
5664 | dev_info(tmp_adev->dev, "Dumping IP State\n"); |
5665 | /* Trigger ip dump before we reset the asic */ |
5666 | for (i = 0; i < tmp_adev->num_ip_blocks; i++) |
5667 | if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state) |
5668 | tmp_adev->ip_blocks[i].version->funcs |
5669 | ->dump_ip_state((void *)&tmp_adev->ip_blocks[i]); |
5670 | dev_info(tmp_adev->dev, "Dumping IP State Completed\n"); |
5671 | } |
5672 | |
5673 | if (need_full_reset) |
5674 | r = amdgpu_device_ip_suspend(adev); |
5675 | if (need_full_reset) |
5676 | set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags); |
5677 | else |
5678 | clear_bit(nr: AMDGPU_NEED_FULL_RESET, |
5679 | addr: &reset_context->flags); |
5680 | } |
5681 | |
5682 | return r; |
5683 | } |
5684 | |
5685 | int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context) |
5686 | { |
5687 | struct list_head *device_list_handle; |
5688 | bool full_reset, vram_lost = false; |
5689 | struct amdgpu_device *tmp_adev; |
5690 | int r, init_level; |
5691 | |
5692 | device_list_handle = reset_context->reset_device_list; |
5693 | |
5694 | if (!device_list_handle) |
5695 | return -EINVAL; |
5696 | |
5697 | full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); |
5698 | |
5699 | /** |
5700 | * If it's reset on init, it's default init level, otherwise keep level |
5701 | * as recovery level. |
5702 | */ |
5703 | if (reset_context->method == AMD_RESET_METHOD_ON_INIT) |
5704 | init_level = AMDGPU_INIT_LEVEL_DEFAULT; |
5705 | else |
5706 | init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY; |
5707 | |
5708 | r = 0; |
5709 | list_for_each_entry(tmp_adev, device_list_handle, reset_list) { |
5710 | amdgpu_set_init_level(adev: tmp_adev, lvl: init_level); |
5711 | if (full_reset) { |
5712 | /* post card */ |
5713 | amdgpu_ras_clear_err_state(adev: tmp_adev); |
5714 | r = amdgpu_device_asic_init(adev: tmp_adev); |
5715 | if (r) { |
5716 | dev_warn(tmp_adev->dev, "asic atom init failed!"); |
5717 | } else { |
5718 | dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); |
5719 | |
5720 | r = amdgpu_device_ip_resume_phase1(adev: tmp_adev); |
5721 | if (r) |
5722 | goto out; |
5723 | |
5724 | vram_lost = amdgpu_device_check_vram_lost(adev: tmp_adev); |
5725 | |
5726 | if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) |
5727 | amdgpu_coredump(adev: tmp_adev, skip_vram_check: false, vram_lost, job: reset_context->job); |
5728 | |
5729 | if (vram_lost) { |
5730 | DRM_INFO("VRAM is lost due to GPU reset!\n"); |
5731 | amdgpu_inc_vram_lost(tmp_adev); |
5732 | } |
5733 | |
5734 | r = amdgpu_device_fw_loading(adev: tmp_adev); |
5735 | if (r) |
5736 | return r; |
5737 | |
5738 | r = amdgpu_xcp_restore_partition_mode( |
5739 | xcp_mgr: tmp_adev->xcp_mgr); |
5740 | if (r) |
5741 | goto out; |
5742 | |
5743 | r = amdgpu_device_ip_resume_phase2(adev: tmp_adev); |
5744 | if (r) |
5745 | goto out; |
5746 | |
5747 | if (tmp_adev->mman.buffer_funcs_ring->sched.ready) |
5748 | amdgpu_ttm_set_buffer_funcs_status(adev: tmp_adev, enable: true); |
5749 | |
5750 | r = amdgpu_device_ip_resume_phase3(adev: tmp_adev); |
5751 | if (r) |
5752 | goto out; |
5753 | |
5754 | if (vram_lost) |
5755 | amdgpu_device_fill_reset_magic(adev: tmp_adev); |
5756 | |
5757 | /* |
5758 | * Add this ASIC as tracked as reset was already |
5759 | * complete successfully. |
5760 | */ |
5761 | amdgpu_register_gpu_instance(adev: tmp_adev); |
5762 | |
5763 | if (!reset_context->hive && |
5764 | tmp_adev->gmc.xgmi.num_physical_nodes > 1) |
5765 | amdgpu_xgmi_add_device(adev: tmp_adev); |
5766 | |
5767 | r = amdgpu_device_ip_late_init(adev: tmp_adev); |
5768 | if (r) |
5769 | goto out; |
5770 | |
5771 | drm_client_dev_resume(dev: adev_to_drm(adev: tmp_adev), holds_console_lock: false); |
5772 | |
5773 | /* |
5774 | * The GPU enters bad state once faulty pages |
5775 | * by ECC has reached the threshold, and ras |
5776 | * recovery is scheduled next. So add one check |
5777 | * here to break recovery if it indeed exceeds |
5778 | * bad page threshold, and remind user to |
5779 | * retire this GPU or setting one bigger |
5780 | * bad_page_threshold value to fix this once |
5781 | * probing driver again. |
5782 | */ |
5783 | if (!amdgpu_ras_is_rma(adev: tmp_adev)) { |
5784 | /* must succeed. */ |
5785 | amdgpu_ras_resume(adev: tmp_adev); |
5786 | } else { |
5787 | r = -EINVAL; |
5788 | goto out; |
5789 | } |
5790 | |
5791 | /* Update PSP FW topology after reset */ |
5792 | if (reset_context->hive && |
5793 | tmp_adev->gmc.xgmi.num_physical_nodes > 1) |
5794 | r = amdgpu_xgmi_update_topology( |
5795 | hive: reset_context->hive, adev: tmp_adev); |
5796 | } |
5797 | } |
5798 | |
5799 | out: |
5800 | if (!r) { |
5801 | /* IP init is complete now, set level as default */ |
5802 | amdgpu_set_init_level(adev: tmp_adev, |
5803 | lvl: AMDGPU_INIT_LEVEL_DEFAULT); |
5804 | amdgpu_irq_gpu_reset_resume_helper(adev: tmp_adev); |
5805 | r = amdgpu_ib_ring_tests(adev: tmp_adev); |
5806 | if (r) { |
5807 | dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); |
5808 | r = -EAGAIN; |
5809 | goto end; |
5810 | } |
5811 | } |
5812 | |
5813 | if (r) |
5814 | tmp_adev->asic_reset_res = r; |
5815 | } |
5816 | |
5817 | end: |
5818 | return r; |
5819 | } |
5820 | |
5821 | int amdgpu_do_asic_reset(struct list_head *device_list_handle, |
5822 | struct amdgpu_reset_context *reset_context) |
5823 | { |
5824 | struct amdgpu_device *tmp_adev = NULL; |
5825 | bool need_full_reset, skip_hw_reset; |
5826 | int r = 0; |
5827 | |
5828 | /* Try reset handler method first */ |
5829 | tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, |
5830 | reset_list); |
5831 | |
5832 | reset_context->reset_device_list = device_list_handle; |
5833 | r = amdgpu_reset_perform_reset(adev: tmp_adev, reset_context); |
5834 | /* If reset handler not implemented, continue; otherwise return */ |
5835 | if (r == -EOPNOTSUPP) |
5836 | r = 0; |
5837 | else |
5838 | return r; |
5839 | |
5840 | /* Reset handler not implemented, use the default method */ |
5841 | need_full_reset = |
5842 | test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags); |
5843 | skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags); |
5844 | |
5845 | /* |
5846 | * ASIC reset has to be done on all XGMI hive nodes ASAP |
5847 | * to allow proper links negotiation in FW (within 1 sec) |
5848 | */ |
5849 | if (!skip_hw_reset && need_full_reset) { |
5850 | list_for_each_entry(tmp_adev, device_list_handle, reset_list) { |
5851 | /* For XGMI run all resets in parallel to speed up the process */ |
5852 | if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { |
5853 | if (!queue_work(wq: system_unbound_wq, |
5854 | work: &tmp_adev->xgmi_reset_work)) |
5855 | r = -EALREADY; |
5856 | } else |
5857 | r = amdgpu_asic_reset(tmp_adev); |
5858 | |
5859 | if (r) { |
5860 | dev_err(tmp_adev->dev, |
5861 | "ASIC reset failed with error, %d for drm dev, %s", |
5862 | r, adev_to_drm(tmp_adev)->unique); |
5863 | goto out; |
5864 | } |
5865 | } |
5866 | |
5867 | /* For XGMI wait for all resets to complete before proceed */ |
5868 | if (!r) { |
5869 | list_for_each_entry(tmp_adev, device_list_handle, |
5870 | reset_list) { |
5871 | if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { |
5872 | flush_work(work: &tmp_adev->xgmi_reset_work); |
5873 | r = tmp_adev->asic_reset_res; |
5874 | if (r) |
5875 | break; |
5876 | } |
5877 | } |
5878 | } |
5879 | } |
5880 | |
5881 | if (!r && amdgpu_ras_intr_triggered()) { |
5882 | list_for_each_entry(tmp_adev, device_list_handle, reset_list) { |
5883 | amdgpu_ras_reset_error_count(adev: tmp_adev, |
5884 | block: AMDGPU_RAS_BLOCK__MMHUB); |
5885 | } |
5886 | |
5887 | amdgpu_ras_intr_cleared(); |
5888 | } |
5889 | |
5890 | r = amdgpu_device_reinit_after_reset(reset_context); |
5891 | if (r == -EAGAIN) |
5892 | set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags); |
5893 | else |
5894 | clear_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags); |
5895 | |
5896 | out: |
5897 | return r; |
5898 | } |
5899 | |
5900 | static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev) |
5901 | { |
5902 | |
5903 | switch (amdgpu_asic_reset_method(adev)) { |
5904 | case AMD_RESET_METHOD_MODE1: |
5905 | case AMD_RESET_METHOD_LINK: |
5906 | adev->mp1_state = PP_MP1_STATE_SHUTDOWN; |
5907 | break; |
5908 | case AMD_RESET_METHOD_MODE2: |
5909 | adev->mp1_state = PP_MP1_STATE_RESET; |
5910 | break; |
5911 | default: |
5912 | adev->mp1_state = PP_MP1_STATE_NONE; |
5913 | break; |
5914 | } |
5915 | } |
5916 | |
5917 | static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev) |
5918 | { |
5919 | amdgpu_vf_error_trans_all(adev); |
5920 | adev->mp1_state = PP_MP1_STATE_NONE; |
5921 | } |
5922 | |
5923 | static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev) |
5924 | { |
5925 | struct pci_dev *p = NULL; |
5926 | |
5927 | p = pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: adev->pdev->bus), |
5928 | bus: adev->pdev->bus->number, devfn: 1); |
5929 | if (p) { |
5930 | pm_runtime_enable(dev: &(p->dev)); |
5931 | pm_runtime_resume(dev: &(p->dev)); |
5932 | } |
5933 | |
5934 | pci_dev_put(dev: p); |
5935 | } |
5936 | |
5937 | static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev) |
5938 | { |
5939 | enum amd_reset_method reset_method; |
5940 | struct pci_dev *p = NULL; |
5941 | u64 expires; |
5942 | |
5943 | /* |
5944 | * For now, only BACO and mode1 reset are confirmed |
5945 | * to suffer the audio issue without proper suspended. |
5946 | */ |
5947 | reset_method = amdgpu_asic_reset_method(adev); |
5948 | if ((reset_method != AMD_RESET_METHOD_BACO) && |
5949 | (reset_method != AMD_RESET_METHOD_MODE1)) |
5950 | return -EINVAL; |
5951 | |
5952 | p = pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: adev->pdev->bus), |
5953 | bus: adev->pdev->bus->number, devfn: 1); |
5954 | if (!p) |
5955 | return -ENODEV; |
5956 | |
5957 | expires = pm_runtime_autosuspend_expiration(dev: &(p->dev)); |
5958 | if (!expires) |
5959 | /* |
5960 | * If we cannot get the audio device autosuspend delay, |
5961 | * a fixed 4S interval will be used. Considering 3S is |
5962 | * the audio controller default autosuspend delay setting. |
5963 | * 4S used here is guaranteed to cover that. |
5964 | */ |
5965 | expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * 4ULL; |
5966 | |
5967 | while (!pm_runtime_status_suspended(dev: &(p->dev))) { |
5968 | if (!pm_runtime_suspend(dev: &(p->dev))) |
5969 | break; |
5970 | |
5971 | if (expires < ktime_get_mono_fast_ns()) { |
5972 | dev_warn(adev->dev, "failed to suspend display audio\n"); |
5973 | pci_dev_put(dev: p); |
5974 | /* TODO: abort the succeeding gpu reset? */ |
5975 | return -ETIMEDOUT; |
5976 | } |
5977 | } |
5978 | |
5979 | pm_runtime_disable(dev: &(p->dev)); |
5980 | |
5981 | pci_dev_put(dev: p); |
5982 | return 0; |
5983 | } |
5984 | |
5985 | static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev) |
5986 | { |
5987 | struct amdgpu_ras *con = amdgpu_ras_get_context(adev); |
5988 | |
5989 | #if defined(CONFIG_DEBUG_FS) |
5990 | if (!amdgpu_sriov_vf(adev)) |
5991 | cancel_work(work: &adev->reset_work); |
5992 | #endif |
5993 | |
5994 | if (adev->kfd.dev) |
5995 | cancel_work(work: &adev->kfd.reset_work); |
5996 | |
5997 | if (amdgpu_sriov_vf(adev)) |
5998 | cancel_work(work: &adev->virt.flr_work); |
5999 | |
6000 | if (con && adev->ras_enabled) |
6001 | cancel_work(work: &con->recovery_work); |
6002 | |
6003 | } |
6004 | |
6005 | static int amdgpu_device_health_check(struct list_head *device_list_handle) |
6006 | { |
6007 | struct amdgpu_device *tmp_adev; |
6008 | int ret = 0; |
6009 | u32 status; |
6010 | |
6011 | list_for_each_entry(tmp_adev, device_list_handle, reset_list) { |
6012 | pci_read_config_dword(dev: tmp_adev->pdev, PCI_COMMAND, val: &status); |
6013 | if (PCI_POSSIBLE_ERROR(status)) { |
6014 | dev_err(tmp_adev->dev, "device lost from bus!"); |
6015 | ret = -ENODEV; |
6016 | } |
6017 | } |
6018 | |
6019 | return ret; |
6020 | } |
6021 | |
6022 | static int amdgpu_device_halt_activities(struct amdgpu_device *adev, |
6023 | struct amdgpu_job *job, |
6024 | struct amdgpu_reset_context *reset_context, |
6025 | struct list_head *device_list, |
6026 | struct amdgpu_hive_info *hive, |
6027 | bool need_emergency_restart) |
6028 | { |
6029 | struct list_head *device_list_handle = NULL; |
6030 | struct amdgpu_device *tmp_adev = NULL; |
6031 | int i, r = 0; |
6032 | |
6033 | /* |
6034 | * Build list of devices to reset. |
6035 | * In case we are in XGMI hive mode, resort the device list |
6036 | * to put adev in the 1st position. |
6037 | */ |
6038 | if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > 1) && hive) { |
6039 | list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { |
6040 | list_add_tail(new: &tmp_adev->reset_list, head: device_list); |
6041 | if (adev->shutdown) |
6042 | tmp_adev->shutdown = true; |
6043 | if (adev->pcie_reset_ctx.occurs_dpc) |
6044 | tmp_adev->pcie_reset_ctx.in_link_reset = true; |
6045 | } |
6046 | if (!list_is_first(list: &adev->reset_list, head: device_list)) |
6047 | list_rotate_to_front(list: &adev->reset_list, head: device_list); |
6048 | device_list_handle = device_list; |
6049 | } else { |
6050 | list_add_tail(new: &adev->reset_list, head: device_list); |
6051 | device_list_handle = device_list; |
6052 | } |
6053 | |
6054 | if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) { |
6055 | r = amdgpu_device_health_check(device_list_handle); |
6056 | if (r) |
6057 | return r; |
6058 | } |
6059 | |
6060 | /* We need to lock reset domain only once both for XGMI and single device */ |
6061 | tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device, |
6062 | reset_list); |
6063 | amdgpu_device_lock_reset_domain(reset_domain: tmp_adev->reset_domain); |
6064 | |
6065 | /* block all schedulers and reset given job's ring */ |
6066 | list_for_each_entry(tmp_adev, device_list_handle, reset_list) { |
6067 | |
6068 | amdgpu_device_set_mp1_state(adev: tmp_adev); |
6069 | |
6070 | /* |
6071 | * Try to put the audio codec into suspend state |
6072 | * before gpu reset started. |
6073 | * |
6074 | * Due to the power domain of the graphics device |
6075 | * is shared with AZ power domain. Without this, |
6076 | * we may change the audio hardware from behind |
6077 | * the audio driver's back. That will trigger |
6078 | * some audio codec errors. |
6079 | */ |
6080 | if (!amdgpu_device_suspend_display_audio(adev: tmp_adev)) |
6081 | tmp_adev->pcie_reset_ctx.audio_suspended = true; |
6082 | |
6083 | amdgpu_ras_set_error_query_ready(adev: tmp_adev, ready: false); |
6084 | |
6085 | cancel_delayed_work_sync(dwork: &tmp_adev->delayed_init_work); |
6086 | |
6087 | amdgpu_amdkfd_pre_reset(adev: tmp_adev, reset_context); |
6088 | |
6089 | /* |
6090 | * Mark these ASICs to be reset as untracked first |
6091 | * And add them back after reset completed |
6092 | */ |
6093 | amdgpu_unregister_gpu_instance(adev: tmp_adev); |
6094 | |
6095 | drm_client_dev_suspend(dev: adev_to_drm(adev: tmp_adev), holds_console_lock: false); |
6096 | |
6097 | /* disable ras on ALL IPs */ |
6098 | if (!need_emergency_restart && |
6099 | (!adev->pcie_reset_ctx.occurs_dpc) && |
6100 | amdgpu_device_ip_need_full_reset(adev: tmp_adev)) |
6101 | amdgpu_ras_suspend(adev: tmp_adev); |
6102 | |
6103 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
6104 | struct amdgpu_ring *ring = tmp_adev->rings[i]; |
6105 | |
6106 | if (!amdgpu_ring_sched_ready(ring)) |
6107 | continue; |
6108 | |
6109 | drm_sched_stop(sched: &ring->sched, bad: job ? &job->base : NULL); |
6110 | |
6111 | if (need_emergency_restart) |
6112 | amdgpu_job_stop_all_jobs_on_sched(sched: &ring->sched); |
6113 | } |
6114 | atomic_inc(v: &tmp_adev->gpu_reset_counter); |
6115 | } |
6116 | |
6117 | return r; |
6118 | } |
6119 | |
6120 | static int amdgpu_device_asic_reset(struct amdgpu_device *adev, |
6121 | struct list_head *device_list, |
6122 | struct amdgpu_reset_context *reset_context) |
6123 | { |
6124 | struct amdgpu_device *tmp_adev = NULL; |
6125 | int retry_limit = AMDGPU_MAX_RETRY_LIMIT; |
6126 | int r = 0; |
6127 | |
6128 | retry: /* Rest of adevs pre asic reset from XGMI hive. */ |
6129 | list_for_each_entry(tmp_adev, device_list, reset_list) { |
6130 | if (adev->pcie_reset_ctx.occurs_dpc) |
6131 | tmp_adev->no_hw_access = true; |
6132 | r = amdgpu_device_pre_asic_reset(adev: tmp_adev, reset_context); |
6133 | if (adev->pcie_reset_ctx.occurs_dpc) |
6134 | tmp_adev->no_hw_access = false; |
6135 | /*TODO Should we stop ?*/ |
6136 | if (r) { |
6137 | dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ", |
6138 | r, adev_to_drm(tmp_adev)->unique); |
6139 | tmp_adev->asic_reset_res = r; |
6140 | } |
6141 | } |
6142 | |
6143 | /* Actual ASIC resets if needed.*/ |
6144 | /* Host driver will handle XGMI hive reset for SRIOV */ |
6145 | if (amdgpu_sriov_vf(adev)) { |
6146 | |
6147 | /* Bail out of reset early */ |
6148 | if (amdgpu_ras_is_rma(adev)) |
6149 | return -ENODEV; |
6150 | |
6151 | if (amdgpu_ras_get_fed_status(adev) || amdgpu_virt_rcvd_ras_interrupt(adev)) { |
6152 | dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n"); |
6153 | amdgpu_ras_set_fed(adev, status: true); |
6154 | set_bit(nr: AMDGPU_HOST_FLR, addr: &reset_context->flags); |
6155 | } |
6156 | |
6157 | r = amdgpu_device_reset_sriov(adev, reset_context); |
6158 | if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > 0) { |
6159 | amdgpu_virt_release_full_gpu(adev, init: true); |
6160 | goto retry; |
6161 | } |
6162 | if (r) |
6163 | adev->asic_reset_res = r; |
6164 | } else { |
6165 | r = amdgpu_do_asic_reset(device_list_handle: device_list, reset_context); |
6166 | if (r && r == -EAGAIN) |
6167 | goto retry; |
6168 | } |
6169 | |
6170 | list_for_each_entry(tmp_adev, device_list, reset_list) { |
6171 | /* |
6172 | * Drop any pending non scheduler resets queued before reset is done. |
6173 | * Any reset scheduled after this point would be valid. Scheduler resets |
6174 | * were already dropped during drm_sched_stop and no new ones can come |
6175 | * in before drm_sched_start. |
6176 | */ |
6177 | amdgpu_device_stop_pending_resets(adev: tmp_adev); |
6178 | } |
6179 | |
6180 | return r; |
6181 | } |
6182 | |
6183 | static int amdgpu_device_sched_resume(struct list_head *device_list, |
6184 | struct amdgpu_reset_context *reset_context, |
6185 | bool job_signaled) |
6186 | { |
6187 | struct amdgpu_device *tmp_adev = NULL; |
6188 | int i, r = 0; |
6189 | |
6190 | /* Post ASIC reset for all devs .*/ |
6191 | list_for_each_entry(tmp_adev, device_list, reset_list) { |
6192 | |
6193 | for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { |
6194 | struct amdgpu_ring *ring = tmp_adev->rings[i]; |
6195 | |
6196 | if (!amdgpu_ring_sched_ready(ring)) |
6197 | continue; |
6198 | |
6199 | drm_sched_start(sched: &ring->sched, errno: 0); |
6200 | } |
6201 | |
6202 | if (!drm_drv_uses_atomic_modeset(dev: adev_to_drm(adev: tmp_adev)) && !job_signaled) |
6203 | drm_helper_resume_force_mode(dev: adev_to_drm(adev: tmp_adev)); |
6204 | |
6205 | if (tmp_adev->asic_reset_res) |
6206 | r = tmp_adev->asic_reset_res; |
6207 | |
6208 | tmp_adev->asic_reset_res = 0; |
6209 | |
6210 | if (r) { |
6211 | /* bad news, how to tell it to userspace ? |
6212 | * for ras error, we should report GPU bad status instead of |
6213 | * reset failure |
6214 | */ |
6215 | if (reset_context->src != AMDGPU_RESET_SRC_RAS || |
6216 | !amdgpu_ras_eeprom_check_err_threshold(adev: tmp_adev)) |
6217 | dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", |
6218 | atomic_read(&tmp_adev->gpu_reset_counter)); |
6219 | amdgpu_vf_error_put(adev: tmp_adev, sub_error_code: AMDGIM_ERROR_VF_GPU_RESET_FAIL, error_flags: 0, error_data: r); |
6220 | } else { |
6221 | dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter)); |
6222 | if (amdgpu_acpi_smart_shift_update(dev: adev_to_drm(adev: tmp_adev), ss_state: AMDGPU_SS_DEV_D0)) |
6223 | DRM_WARN("smart shift update failed\n"); |
6224 | } |
6225 | } |
6226 | |
6227 | return r; |
6228 | } |
6229 | |
6230 | static void amdgpu_device_gpu_resume(struct amdgpu_device *adev, |
6231 | struct list_head *device_list, |
6232 | bool need_emergency_restart) |
6233 | { |
6234 | struct amdgpu_device *tmp_adev = NULL; |
6235 | |
6236 | list_for_each_entry(tmp_adev, device_list, reset_list) { |
6237 | /* unlock kfd: SRIOV would do it separately */ |
6238 | if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev)) |
6239 | amdgpu_amdkfd_post_reset(adev: tmp_adev); |
6240 | |
6241 | /* kfd_post_reset will do nothing if kfd device is not initialized, |
6242 | * need to bring up kfd here if it's not be initialized before |
6243 | */ |
6244 | if (!adev->kfd.init_complete) |
6245 | amdgpu_amdkfd_device_init(adev); |
6246 | |
6247 | if (tmp_adev->pcie_reset_ctx.audio_suspended) |
6248 | amdgpu_device_resume_display_audio(adev: tmp_adev); |
6249 | |
6250 | amdgpu_device_unset_mp1_state(adev: tmp_adev); |
6251 | |
6252 | amdgpu_ras_set_error_query_ready(adev: tmp_adev, ready: true); |
6253 | |
6254 | } |
6255 | |
6256 | tmp_adev = list_first_entry(device_list, struct amdgpu_device, |
6257 | reset_list); |
6258 | amdgpu_device_unlock_reset_domain(reset_domain: tmp_adev->reset_domain); |
6259 | |
6260 | } |
6261 | |
6262 | |
6263 | /** |
6264 | * amdgpu_device_gpu_recover - reset the asic and recover scheduler |
6265 | * |
6266 | * @adev: amdgpu_device pointer |
6267 | * @job: which job trigger hang |
6268 | * @reset_context: amdgpu reset context pointer |
6269 | * |
6270 | * Attempt to reset the GPU if it has hung (all asics). |
6271 | * Attempt to do soft-reset or full-reset and reinitialize Asic |
6272 | * Returns 0 for success or an error on failure. |
6273 | */ |
6274 | |
6275 | int amdgpu_device_gpu_recover(struct amdgpu_device *adev, |
6276 | struct amdgpu_job *job, |
6277 | struct amdgpu_reset_context *reset_context) |
6278 | { |
6279 | struct list_head device_list; |
6280 | bool job_signaled = false; |
6281 | struct amdgpu_hive_info *hive = NULL; |
6282 | int r = 0; |
6283 | bool need_emergency_restart = false; |
6284 | |
6285 | /* |
6286 | * If it reaches here because of hang/timeout and a RAS error is |
6287 | * detected at the same time, let RAS recovery take care of it. |
6288 | */ |
6289 | if (amdgpu_ras_is_err_state(adev, block: AMDGPU_RAS_BLOCK__ANY) && |
6290 | !amdgpu_sriov_vf(adev) && |
6291 | reset_context->src != AMDGPU_RESET_SRC_RAS) { |
6292 | dev_dbg(adev->dev, |
6293 | "Gpu recovery from source: %d yielding to RAS error recovery handling", |
6294 | reset_context->src); |
6295 | return 0; |
6296 | } |
6297 | |
6298 | /* |
6299 | * Special case: RAS triggered and full reset isn't supported |
6300 | */ |
6301 | need_emergency_restart = amdgpu_ras_need_emergency_restart(adev); |
6302 | |
6303 | /* |
6304 | * Flush RAM to disk so that after reboot |
6305 | * the user can read log and see why the system rebooted. |
6306 | */ |
6307 | if (need_emergency_restart && amdgpu_ras_get_context(adev) && |
6308 | amdgpu_ras_get_context(adev)->reboot) { |
6309 | DRM_WARN("Emergency reboot."); |
6310 | |
6311 | ksys_sync_helper(); |
6312 | emergency_restart(); |
6313 | } |
6314 | |
6315 | dev_info(adev->dev, "GPU %s begin!\n", |
6316 | need_emergency_restart ? "jobs stop": "reset"); |
6317 | |
6318 | if (!amdgpu_sriov_vf(adev)) |
6319 | hive = amdgpu_get_xgmi_hive(adev); |
6320 | if (hive) |
6321 | mutex_lock(&hive->hive_lock); |
6322 | |
6323 | reset_context->job = job; |
6324 | reset_context->hive = hive; |
6325 | INIT_LIST_HEAD(list: &device_list); |
6326 | |
6327 | r = amdgpu_device_halt_activities(adev, job, reset_context, device_list: &device_list, |
6328 | hive, need_emergency_restart); |
6329 | if (r) |
6330 | goto end_reset; |
6331 | |
6332 | if (need_emergency_restart) |
6333 | goto skip_sched_resume; |
6334 | /* |
6335 | * Must check guilty signal here since after this point all old |
6336 | * HW fences are force signaled. |
6337 | * |
6338 | * job->base holds a reference to parent fence |
6339 | */ |
6340 | if (job && dma_fence_is_signaled(fence: &job->hw_fence)) { |
6341 | job_signaled = true; |
6342 | dev_info(adev->dev, "Guilty job already signaled, skipping HW reset"); |
6343 | goto skip_hw_reset; |
6344 | } |
6345 | |
6346 | r = amdgpu_device_asic_reset(adev, device_list: &device_list, reset_context); |
6347 | if (r) |
6348 | goto end_reset; |
6349 | skip_hw_reset: |
6350 | r = amdgpu_device_sched_resume(device_list: &device_list, reset_context, job_signaled); |
6351 | if (r) |
6352 | goto end_reset; |
6353 | skip_sched_resume: |
6354 | amdgpu_device_gpu_resume(adev, device_list: &device_list, need_emergency_restart); |
6355 | end_reset: |
6356 | if (hive) { |
6357 | mutex_unlock(lock: &hive->hive_lock); |
6358 | amdgpu_put_xgmi_hive(hive); |
6359 | } |
6360 | |
6361 | if (r) |
6362 | dev_info(adev->dev, "GPU reset end with ret = %d\n", r); |
6363 | |
6364 | atomic_set(v: &adev->reset_domain->reset_res, i: r); |
6365 | |
6366 | if (!r) |
6367 | drm_dev_wedged_event(dev: adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE); |
6368 | |
6369 | return r; |
6370 | } |
6371 | |
6372 | /** |
6373 | * amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner |
6374 | * |
6375 | * @adev: amdgpu_device pointer |
6376 | * @speed: pointer to the speed of the link |
6377 | * @width: pointer to the width of the link |
6378 | * |
6379 | * Evaluate the hierarchy to find the speed and bandwidth capabilities of the |
6380 | * first physical partner to an AMD dGPU. |
6381 | * This will exclude any virtual switches and links. |
6382 | */ |
6383 | static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev, |
6384 | enum pci_bus_speed *speed, |
6385 | enum pcie_link_width *width) |
6386 | { |
6387 | struct pci_dev *parent = adev->pdev; |
6388 | |
6389 | if (!speed || !width) |
6390 | return; |
6391 | |
6392 | *speed = PCI_SPEED_UNKNOWN; |
6393 | *width = PCIE_LNK_WIDTH_UNKNOWN; |
6394 | |
6395 | if (amdgpu_device_pcie_dynamic_switching_supported(adev)) { |
6396 | while ((parent = pci_upstream_bridge(dev: parent))) { |
6397 | /* skip upstream/downstream switches internal to dGPU*/ |
6398 | if (parent->vendor == PCI_VENDOR_ID_ATI) |
6399 | continue; |
6400 | *speed = pcie_get_speed_cap(dev: parent); |
6401 | *width = pcie_get_width_cap(dev: parent); |
6402 | break; |
6403 | } |
6404 | } else { |
6405 | /* use the current speeds rather than max if switching is not supported */ |
6406 | pcie_bandwidth_available(dev: adev->pdev, NULL, speed, width); |
6407 | } |
6408 | } |
6409 | |
6410 | /** |
6411 | * amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU |
6412 | * |
6413 | * @adev: amdgpu_device pointer |
6414 | * @speed: pointer to the speed of the link |
6415 | * @width: pointer to the width of the link |
6416 | * |
6417 | * Evaluate the hierarchy to find the speed and bandwidth capabilities of the |
6418 | * AMD dGPU which may be a virtual upstream bridge. |
6419 | */ |
6420 | static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev, |
6421 | enum pci_bus_speed *speed, |
6422 | enum pcie_link_width *width) |
6423 | { |
6424 | struct pci_dev *parent = adev->pdev; |
6425 | |
6426 | if (!speed || !width) |
6427 | return; |
6428 | |
6429 | parent = pci_upstream_bridge(dev: parent); |
6430 | if (parent && parent->vendor == PCI_VENDOR_ID_ATI) { |
6431 | /* use the upstream/downstream switches internal to dGPU */ |
6432 | *speed = pcie_get_speed_cap(dev: parent); |
6433 | *width = pcie_get_width_cap(dev: parent); |
6434 | while ((parent = pci_upstream_bridge(dev: parent))) { |
6435 | if (parent->vendor == PCI_VENDOR_ID_ATI) { |
6436 | /* use the upstream/downstream switches internal to dGPU */ |
6437 | *speed = pcie_get_speed_cap(dev: parent); |
6438 | *width = pcie_get_width_cap(dev: parent); |
6439 | } |
6440 | } |
6441 | } else { |
6442 | /* use the device itself */ |
6443 | *speed = pcie_get_speed_cap(dev: adev->pdev); |
6444 | *width = pcie_get_width_cap(dev: adev->pdev); |
6445 | } |
6446 | } |
6447 | |
6448 | /** |
6449 | * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot |
6450 | * |
6451 | * @adev: amdgpu_device pointer |
6452 | * |
6453 | * Fetches and stores in the driver the PCIE capabilities (gen speed |
6454 | * and lanes) of the slot the device is in. Handles APUs and |
6455 | * virtualized environments where PCIE config space may not be available. |
6456 | */ |
6457 | static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev) |
6458 | { |
6459 | enum pci_bus_speed speed_cap, platform_speed_cap; |
6460 | enum pcie_link_width platform_link_width, link_width; |
6461 | |
6462 | if (amdgpu_pcie_gen_cap) |
6463 | adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap; |
6464 | |
6465 | if (amdgpu_pcie_lane_cap) |
6466 | adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap; |
6467 | |
6468 | /* covers APUs as well */ |
6469 | if (pci_is_root_bus(pbus: adev->pdev->bus) && !amdgpu_passthrough(adev)) { |
6470 | if (adev->pm.pcie_gen_mask == 0) |
6471 | adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK; |
6472 | if (adev->pm.pcie_mlw_mask == 0) |
6473 | adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK; |
6474 | return; |
6475 | } |
6476 | |
6477 | if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask) |
6478 | return; |
6479 | |
6480 | amdgpu_device_partner_bandwidth(adev, speed: &platform_speed_cap, |
6481 | width: &platform_link_width); |
6482 | amdgpu_device_gpu_bandwidth(adev, speed: &speed_cap, width: &link_width); |
6483 | |
6484 | if (adev->pm.pcie_gen_mask == 0) { |
6485 | /* asic caps */ |
6486 | if (speed_cap == PCI_SPEED_UNKNOWN) { |
6487 | adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6488 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | |
6489 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); |
6490 | } else { |
6491 | if (speed_cap == PCIE_SPEED_32_0GT) |
6492 | adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6493 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | |
6494 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | |
6495 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 | |
6496 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5); |
6497 | else if (speed_cap == PCIE_SPEED_16_0GT) |
6498 | adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6499 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | |
6500 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 | |
6501 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4); |
6502 | else if (speed_cap == PCIE_SPEED_8_0GT) |
6503 | adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6504 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 | |
6505 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3); |
6506 | else if (speed_cap == PCIE_SPEED_5_0GT) |
6507 | adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6508 | CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2); |
6509 | else |
6510 | adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1; |
6511 | } |
6512 | /* platform caps */ |
6513 | if (platform_speed_cap == PCI_SPEED_UNKNOWN) { |
6514 | adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6515 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); |
6516 | } else { |
6517 | if (platform_speed_cap == PCIE_SPEED_32_0GT) |
6518 | adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6519 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | |
6520 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | |
6521 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 | |
6522 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5); |
6523 | else if (platform_speed_cap == PCIE_SPEED_16_0GT) |
6524 | adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6525 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | |
6526 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 | |
6527 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4); |
6528 | else if (platform_speed_cap == PCIE_SPEED_8_0GT) |
6529 | adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6530 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 | |
6531 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3); |
6532 | else if (platform_speed_cap == PCIE_SPEED_5_0GT) |
6533 | adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 | |
6534 | CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2); |
6535 | else |
6536 | adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1; |
6537 | |
6538 | } |
6539 | } |
6540 | if (adev->pm.pcie_mlw_mask == 0) { |
6541 | /* asic caps */ |
6542 | if (link_width == PCIE_LNK_WIDTH_UNKNOWN) { |
6543 | adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK; |
6544 | } else { |
6545 | switch (link_width) { |
6546 | case PCIE_LNK_X32: |
6547 | adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 | |
6548 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | |
6549 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | |
6550 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | |
6551 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6552 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6553 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); |
6554 | break; |
6555 | case PCIE_LNK_X16: |
6556 | adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 | |
6557 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | |
6558 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | |
6559 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6560 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6561 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); |
6562 | break; |
6563 | case PCIE_LNK_X12: |
6564 | adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 | |
6565 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | |
6566 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6567 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6568 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); |
6569 | break; |
6570 | case PCIE_LNK_X8: |
6571 | adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 | |
6572 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6573 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6574 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); |
6575 | break; |
6576 | case PCIE_LNK_X4: |
6577 | adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6578 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6579 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); |
6580 | break; |
6581 | case PCIE_LNK_X2: |
6582 | adev->pm.pcie_mlw_mask |= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6583 | CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1); |
6584 | break; |
6585 | case PCIE_LNK_X1: |
6586 | adev->pm.pcie_mlw_mask |= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1; |
6587 | break; |
6588 | default: |
6589 | break; |
6590 | } |
6591 | } |
6592 | /* platform caps */ |
6593 | if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) { |
6594 | adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK; |
6595 | } else { |
6596 | switch (platform_link_width) { |
6597 | case PCIE_LNK_X32: |
6598 | adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 | |
6599 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | |
6600 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | |
6601 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | |
6602 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6603 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6604 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); |
6605 | break; |
6606 | case PCIE_LNK_X16: |
6607 | adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 | |
6608 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | |
6609 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | |
6610 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6611 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6612 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); |
6613 | break; |
6614 | case PCIE_LNK_X12: |
6615 | adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 | |
6616 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | |
6617 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6618 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6619 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); |
6620 | break; |
6621 | case PCIE_LNK_X8: |
6622 | adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 | |
6623 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6624 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6625 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); |
6626 | break; |
6627 | case PCIE_LNK_X4: |
6628 | adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 | |
6629 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6630 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); |
6631 | break; |
6632 | case PCIE_LNK_X2: |
6633 | adev->pm.pcie_mlw_mask |= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 | |
6634 | CAIL_PCIE_LINK_WIDTH_SUPPORT_X1); |
6635 | break; |
6636 | case PCIE_LNK_X1: |
6637 | adev->pm.pcie_mlw_mask |= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1; |
6638 | break; |
6639 | default: |
6640 | break; |
6641 | } |
6642 | } |
6643 | } |
6644 | } |
6645 | |
6646 | /** |
6647 | * amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR |
6648 | * |
6649 | * @adev: amdgpu_device pointer |
6650 | * @peer_adev: amdgpu_device pointer for peer device trying to access @adev |
6651 | * |
6652 | * Return true if @peer_adev can access (DMA) @adev through the PCIe |
6653 | * BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of |
6654 | * @peer_adev. |
6655 | */ |
6656 | bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev, |
6657 | struct amdgpu_device *peer_adev) |
6658 | { |
6659 | #ifdef CONFIG_HSA_AMD_P2P |
6660 | bool p2p_access = |
6661 | !adev->gmc.xgmi.connected_to_cpu && |
6662 | !(pci_p2pdma_distance(provider: adev->pdev, client: peer_adev->dev, verbose: false) < 0); |
6663 | if (!p2p_access) |
6664 | dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n", |
6665 | pci_name(peer_adev->pdev)); |
6666 | |
6667 | bool is_large_bar = adev->gmc.visible_vram_size && |
6668 | adev->gmc.real_vram_size == adev->gmc.visible_vram_size; |
6669 | bool p2p_addressable = amdgpu_device_check_iommu_remap(adev: peer_adev); |
6670 | |
6671 | if (!p2p_addressable) { |
6672 | uint64_t address_mask = peer_adev->dev->dma_mask ? |
6673 | ~*peer_adev->dev->dma_mask : ~((1ULL << 32) - 1); |
6674 | resource_size_t aper_limit = |
6675 | adev->gmc.aper_base + adev->gmc.aper_size - 1; |
6676 | |
6677 | p2p_addressable = !(adev->gmc.aper_base & address_mask || |
6678 | aper_limit & address_mask); |
6679 | } |
6680 | return pcie_p2p && is_large_bar && p2p_access && p2p_addressable; |
6681 | #else |
6682 | return false; |
6683 | #endif |
6684 | } |
6685 | |
6686 | int amdgpu_device_baco_enter(struct drm_device *dev) |
6687 | { |
6688 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
6689 | struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); |
6690 | |
6691 | if (!amdgpu_device_supports_baco(dev)) |
6692 | return -ENOTSUPP; |
6693 | |
6694 | if (ras && adev->ras_enabled && |
6695 | adev->nbio.funcs->enable_doorbell_interrupt) |
6696 | adev->nbio.funcs->enable_doorbell_interrupt(adev, false); |
6697 | |
6698 | return amdgpu_dpm_baco_enter(adev); |
6699 | } |
6700 | |
6701 | int amdgpu_device_baco_exit(struct drm_device *dev) |
6702 | { |
6703 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
6704 | struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); |
6705 | int ret = 0; |
6706 | |
6707 | if (!amdgpu_device_supports_baco(dev)) |
6708 | return -ENOTSUPP; |
6709 | |
6710 | ret = amdgpu_dpm_baco_exit(adev); |
6711 | if (ret) |
6712 | return ret; |
6713 | |
6714 | if (ras && adev->ras_enabled && |
6715 | adev->nbio.funcs->enable_doorbell_interrupt) |
6716 | adev->nbio.funcs->enable_doorbell_interrupt(adev, true); |
6717 | |
6718 | if (amdgpu_passthrough(adev) && adev->nbio.funcs && |
6719 | adev->nbio.funcs->clear_doorbell_interrupt) |
6720 | adev->nbio.funcs->clear_doorbell_interrupt(adev); |
6721 | |
6722 | return 0; |
6723 | } |
6724 | |
6725 | /** |
6726 | * amdgpu_pci_error_detected - Called when a PCI error is detected. |
6727 | * @pdev: PCI device struct |
6728 | * @state: PCI channel state |
6729 | * |
6730 | * Description: Called when a PCI error is detected. |
6731 | * |
6732 | * Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT. |
6733 | */ |
6734 | pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state) |
6735 | { |
6736 | struct drm_device *dev = pci_get_drvdata(pdev); |
6737 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
6738 | struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); |
6739 | struct amdgpu_reset_context reset_context; |
6740 | struct list_head device_list; |
6741 | int r = 0; |
6742 | |
6743 | dev_info(adev->dev, "PCI error: detected callback!!\n"); |
6744 | |
6745 | if (!amdgpu_dpm_is_link_reset_supported(adev)) { |
6746 | dev_warn(adev->dev, "No support for XGMI hive yet...\n"); |
6747 | return PCI_ERS_RESULT_DISCONNECT; |
6748 | } |
6749 | |
6750 | adev->pci_channel_state = state; |
6751 | |
6752 | switch (state) { |
6753 | case pci_channel_io_normal: |
6754 | dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state); |
6755 | return PCI_ERS_RESULT_CAN_RECOVER; |
6756 | case pci_channel_io_frozen: |
6757 | /* Fatal error, prepare for slot reset */ |
6758 | dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state); |
6759 | |
6760 | if (hive) |
6761 | mutex_lock(&hive->hive_lock); |
6762 | adev->pcie_reset_ctx.occurs_dpc = true; |
6763 | memset(&reset_context, 0, sizeof(reset_context)); |
6764 | INIT_LIST_HEAD(list: &device_list); |
6765 | |
6766 | r = amdgpu_device_halt_activities(adev, NULL, reset_context: &reset_context, device_list: &device_list, |
6767 | hive, need_emergency_restart: false); |
6768 | if (hive) { |
6769 | mutex_unlock(lock: &hive->hive_lock); |
6770 | amdgpu_put_xgmi_hive(hive); |
6771 | } |
6772 | if (r) |
6773 | return PCI_ERS_RESULT_DISCONNECT; |
6774 | return PCI_ERS_RESULT_NEED_RESET; |
6775 | case pci_channel_io_perm_failure: |
6776 | /* Permanent error, prepare for device removal */ |
6777 | dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state); |
6778 | return PCI_ERS_RESULT_DISCONNECT; |
6779 | } |
6780 | |
6781 | return PCI_ERS_RESULT_NEED_RESET; |
6782 | } |
6783 | |
6784 | /** |
6785 | * amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers |
6786 | * @pdev: pointer to PCI device |
6787 | */ |
6788 | pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev) |
6789 | { |
6790 | struct drm_device *dev = pci_get_drvdata(pdev); |
6791 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
6792 | |
6793 | dev_info(adev->dev, "PCI error: mmio enabled callback!!\n"); |
6794 | |
6795 | /* TODO - dump whatever for debugging purposes */ |
6796 | |
6797 | /* This called only if amdgpu_pci_error_detected returns |
6798 | * PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still |
6799 | * works, no need to reset slot. |
6800 | */ |
6801 | |
6802 | return PCI_ERS_RESULT_RECOVERED; |
6803 | } |
6804 | |
6805 | /** |
6806 | * amdgpu_pci_slot_reset - Called when PCI slot has been reset. |
6807 | * @pdev: PCI device struct |
6808 | * |
6809 | * Description: This routine is called by the pci error recovery |
6810 | * code after the PCI slot has been reset, just before we |
6811 | * should resume normal operations. |
6812 | */ |
6813 | pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev) |
6814 | { |
6815 | struct drm_device *dev = pci_get_drvdata(pdev); |
6816 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
6817 | struct amdgpu_reset_context reset_context; |
6818 | struct amdgpu_device *tmp_adev; |
6819 | struct amdgpu_hive_info *hive; |
6820 | struct list_head device_list; |
6821 | int r = 0, i; |
6822 | u32 memsize; |
6823 | |
6824 | /* PCI error slot reset should be skipped During RAS recovery */ |
6825 | if ((amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 3) || |
6826 | amdgpu_ip_version(adev, ip: GC_HWIP, inst: 0) == IP_VERSION(9, 4, 4)) && |
6827 | amdgpu_ras_in_recovery(adev)) |
6828 | return PCI_ERS_RESULT_RECOVERED; |
6829 | |
6830 | dev_info(adev->dev, "PCI error: slot reset callback!!\n"); |
6831 | |
6832 | memset(&reset_context, 0, sizeof(reset_context)); |
6833 | |
6834 | /* wait for asic to come out of reset */ |
6835 | msleep(msecs: 700); |
6836 | |
6837 | /* Restore PCI confspace */ |
6838 | amdgpu_device_load_pci_state(pdev); |
6839 | |
6840 | /* confirm ASIC came out of reset */ |
6841 | for (i = 0; i < adev->usec_timeout; i++) { |
6842 | memsize = amdgpu_asic_get_config_memsize(adev); |
6843 | |
6844 | if (memsize != 0xffffffff) |
6845 | break; |
6846 | udelay(usec: 1); |
6847 | } |
6848 | if (memsize == 0xffffffff) { |
6849 | r = -ETIME; |
6850 | goto out; |
6851 | } |
6852 | |
6853 | reset_context.method = AMD_RESET_METHOD_NONE; |
6854 | reset_context.reset_req_dev = adev; |
6855 | set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context.flags); |
6856 | set_bit(nr: AMDGPU_SKIP_COREDUMP, addr: &reset_context.flags); |
6857 | INIT_LIST_HEAD(list: &device_list); |
6858 | |
6859 | hive = amdgpu_get_xgmi_hive(adev); |
6860 | if (hive) { |
6861 | mutex_lock(&hive->hive_lock); |
6862 | reset_context.hive = hive; |
6863 | list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { |
6864 | tmp_adev->pcie_reset_ctx.in_link_reset = true; |
6865 | list_add_tail(new: &tmp_adev->reset_list, head: &device_list); |
6866 | } |
6867 | } else { |
6868 | set_bit(nr: AMDGPU_SKIP_HW_RESET, addr: &reset_context.flags); |
6869 | list_add_tail(new: &adev->reset_list, head: &device_list); |
6870 | } |
6871 | |
6872 | r = amdgpu_device_asic_reset(adev, device_list: &device_list, reset_context: &reset_context); |
6873 | out: |
6874 | if (!r) { |
6875 | if (amdgpu_device_cache_pci_state(pdev: adev->pdev)) |
6876 | pci_restore_state(dev: adev->pdev); |
6877 | dev_info(adev->dev, "PCIe error recovery succeeded\n"); |
6878 | } else { |
6879 | dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r); |
6880 | if (hive) { |
6881 | list_for_each_entry(tmp_adev, &device_list, reset_list) |
6882 | amdgpu_device_unset_mp1_state(adev: tmp_adev); |
6883 | amdgpu_device_unlock_reset_domain(reset_domain: adev->reset_domain); |
6884 | } |
6885 | } |
6886 | |
6887 | if (hive) { |
6888 | mutex_unlock(lock: &hive->hive_lock); |
6889 | amdgpu_put_xgmi_hive(hive); |
6890 | } |
6891 | |
6892 | return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; |
6893 | } |
6894 | |
6895 | /** |
6896 | * amdgpu_pci_resume() - resume normal ops after PCI reset |
6897 | * @pdev: pointer to PCI device |
6898 | * |
6899 | * Called when the error recovery driver tells us that its |
6900 | * OK to resume normal operation. |
6901 | */ |
6902 | void amdgpu_pci_resume(struct pci_dev *pdev) |
6903 | { |
6904 | struct drm_device *dev = pci_get_drvdata(pdev); |
6905 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
6906 | struct list_head device_list; |
6907 | struct amdgpu_hive_info *hive = NULL; |
6908 | struct amdgpu_device *tmp_adev = NULL; |
6909 | |
6910 | dev_info(adev->dev, "PCI error: resume callback!!\n"); |
6911 | |
6912 | /* Only continue execution for the case of pci_channel_io_frozen */ |
6913 | if (adev->pci_channel_state != pci_channel_io_frozen) |
6914 | return; |
6915 | |
6916 | INIT_LIST_HEAD(list: &device_list); |
6917 | |
6918 | hive = amdgpu_get_xgmi_hive(adev); |
6919 | if (hive) { |
6920 | mutex_lock(&hive->hive_lock); |
6921 | list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { |
6922 | tmp_adev->pcie_reset_ctx.in_link_reset = false; |
6923 | list_add_tail(new: &tmp_adev->reset_list, head: &device_list); |
6924 | } |
6925 | } else |
6926 | list_add_tail(new: &adev->reset_list, head: &device_list); |
6927 | |
6928 | amdgpu_device_sched_resume(device_list: &device_list, NULL, NULL); |
6929 | amdgpu_device_gpu_resume(adev, device_list: &device_list, need_emergency_restart: false); |
6930 | adev->pcie_reset_ctx.occurs_dpc = false; |
6931 | |
6932 | if (hive) { |
6933 | mutex_unlock(lock: &hive->hive_lock); |
6934 | amdgpu_put_xgmi_hive(hive); |
6935 | } |
6936 | } |
6937 | |
6938 | bool amdgpu_device_cache_pci_state(struct pci_dev *pdev) |
6939 | { |
6940 | struct drm_device *dev = pci_get_drvdata(pdev); |
6941 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
6942 | int r; |
6943 | |
6944 | if (amdgpu_sriov_vf(adev)) |
6945 | return false; |
6946 | |
6947 | r = pci_save_state(dev: pdev); |
6948 | if (!r) { |
6949 | kfree(objp: adev->pci_state); |
6950 | |
6951 | adev->pci_state = pci_store_saved_state(dev: pdev); |
6952 | |
6953 | if (!adev->pci_state) { |
6954 | DRM_ERROR("Failed to store PCI saved state"); |
6955 | return false; |
6956 | } |
6957 | } else { |
6958 | DRM_WARN("Failed to save PCI state, err:%d\n", r); |
6959 | return false; |
6960 | } |
6961 | |
6962 | return true; |
6963 | } |
6964 | |
6965 | bool amdgpu_device_load_pci_state(struct pci_dev *pdev) |
6966 | { |
6967 | struct drm_device *dev = pci_get_drvdata(pdev); |
6968 | struct amdgpu_device *adev = drm_to_adev(ddev: dev); |
6969 | int r; |
6970 | |
6971 | if (!adev->pci_state) |
6972 | return false; |
6973 | |
6974 | r = pci_load_saved_state(dev: pdev, state: adev->pci_state); |
6975 | |
6976 | if (!r) { |
6977 | pci_restore_state(dev: pdev); |
6978 | } else { |
6979 | DRM_WARN("Failed to load PCI state, err:%d\n", r); |
6980 | return false; |
6981 | } |
6982 | |
6983 | return true; |
6984 | } |
6985 | |
6986 | void amdgpu_device_flush_hdp(struct amdgpu_device *adev, |
6987 | struct amdgpu_ring *ring) |
6988 | { |
6989 | #ifdef CONFIG_X86_64 |
6990 | if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) |
6991 | return; |
6992 | #endif |
6993 | if (adev->gmc.xgmi.connected_to_cpu) |
6994 | return; |
6995 | |
6996 | if (ring && ring->funcs->emit_hdp_flush) |
6997 | amdgpu_ring_emit_hdp_flush(ring); |
6998 | else |
6999 | amdgpu_asic_flush_hdp(adev, ring); |
7000 | } |
7001 | |
7002 | void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev, |
7003 | struct amdgpu_ring *ring) |
7004 | { |
7005 | #ifdef CONFIG_X86_64 |
7006 | if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) |
7007 | return; |
7008 | #endif |
7009 | if (adev->gmc.xgmi.connected_to_cpu) |
7010 | return; |
7011 | |
7012 | amdgpu_asic_invalidate_hdp(adev, ring); |
7013 | } |
7014 | |
7015 | int amdgpu_in_reset(struct amdgpu_device *adev) |
7016 | { |
7017 | return atomic_read(v: &adev->reset_domain->in_gpu_reset); |
7018 | } |
7019 | |
7020 | /** |
7021 | * amdgpu_device_halt() - bring hardware to some kind of halt state |
7022 | * |
7023 | * @adev: amdgpu_device pointer |
7024 | * |
7025 | * Bring hardware to some kind of halt state so that no one can touch it |
7026 | * any more. It will help to maintain error context when error occurred. |
7027 | * Compare to a simple hang, the system will keep stable at least for SSH |
7028 | * access. Then it should be trivial to inspect the hardware state and |
7029 | * see what's going on. Implemented as following: |
7030 | * |
7031 | * 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc), |
7032 | * clears all CPU mappings to device, disallows remappings through page faults |
7033 | * 2. amdgpu_irq_disable_all() disables all interrupts |
7034 | * 3. amdgpu_fence_driver_hw_fini() signals all HW fences |
7035 | * 4. set adev->no_hw_access to avoid potential crashes after setp 5 |
7036 | * 5. amdgpu_device_unmap_mmio() clears all MMIO mappings |
7037 | * 6. pci_disable_device() and pci_wait_for_pending_transaction() |
7038 | * flush any in flight DMA operations |
7039 | */ |
7040 | void amdgpu_device_halt(struct amdgpu_device *adev) |
7041 | { |
7042 | struct pci_dev *pdev = adev->pdev; |
7043 | struct drm_device *ddev = adev_to_drm(adev); |
7044 | |
7045 | amdgpu_xcp_dev_unplug(adev); |
7046 | drm_dev_unplug(dev: ddev); |
7047 | |
7048 | amdgpu_irq_disable_all(adev); |
7049 | |
7050 | amdgpu_fence_driver_hw_fini(adev); |
7051 | |
7052 | adev->no_hw_access = true; |
7053 | |
7054 | amdgpu_device_unmap_mmio(adev); |
7055 | |
7056 | pci_disable_device(dev: pdev); |
7057 | pci_wait_for_pending_transaction(dev: pdev); |
7058 | } |
7059 | |
7060 | u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev, |
7061 | u32 reg) |
7062 | { |
7063 | unsigned long flags, address, data; |
7064 | u32 r; |
7065 | |
7066 | address = adev->nbio.funcs->get_pcie_port_index_offset(adev); |
7067 | data = adev->nbio.funcs->get_pcie_port_data_offset(adev); |
7068 | |
7069 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
7070 | WREG32(address, reg * 4); |
7071 | (void)RREG32(address); |
7072 | r = RREG32(data); |
7073 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
7074 | return r; |
7075 | } |
7076 | |
7077 | void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev, |
7078 | u32 reg, u32 v) |
7079 | { |
7080 | unsigned long flags, address, data; |
7081 | |
7082 | address = adev->nbio.funcs->get_pcie_port_index_offset(adev); |
7083 | data = adev->nbio.funcs->get_pcie_port_data_offset(adev); |
7084 | |
7085 | spin_lock_irqsave(&adev->pcie_idx_lock, flags); |
7086 | WREG32(address, reg * 4); |
7087 | (void)RREG32(address); |
7088 | WREG32(data, v); |
7089 | (void)RREG32(data); |
7090 | spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags); |
7091 | } |
7092 | |
7093 | /** |
7094 | * amdgpu_device_get_gang - return a reference to the current gang |
7095 | * @adev: amdgpu_device pointer |
7096 | * |
7097 | * Returns: A new reference to the current gang leader. |
7098 | */ |
7099 | struct dma_fence *amdgpu_device_get_gang(struct amdgpu_device *adev) |
7100 | { |
7101 | struct dma_fence *fence; |
7102 | |
7103 | rcu_read_lock(); |
7104 | fence = dma_fence_get_rcu_safe(fencep: &adev->gang_submit); |
7105 | rcu_read_unlock(); |
7106 | return fence; |
7107 | } |
7108 | |
7109 | /** |
7110 | * amdgpu_device_switch_gang - switch to a new gang |
7111 | * @adev: amdgpu_device pointer |
7112 | * @gang: the gang to switch to |
7113 | * |
7114 | * Try to switch to a new gang. |
7115 | * Returns: NULL if we switched to the new gang or a reference to the current |
7116 | * gang leader. |
7117 | */ |
7118 | struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev, |
7119 | struct dma_fence *gang) |
7120 | { |
7121 | struct dma_fence *old = NULL; |
7122 | |
7123 | dma_fence_get(fence: gang); |
7124 | do { |
7125 | dma_fence_put(fence: old); |
7126 | old = amdgpu_device_get_gang(adev); |
7127 | if (old == gang) |
7128 | break; |
7129 | |
7130 | if (!dma_fence_is_signaled(fence: old)) { |
7131 | dma_fence_put(fence: gang); |
7132 | return old; |
7133 | } |
7134 | |
7135 | } while (cmpxchg((struct dma_fence __force **)&adev->gang_submit, |
7136 | old, gang) != old); |
7137 | |
7138 | /* |
7139 | * Drop it once for the exchanged reference in adev and once for the |
7140 | * thread local reference acquired in amdgpu_device_get_gang(). |
7141 | */ |
7142 | dma_fence_put(fence: old); |
7143 | dma_fence_put(fence: old); |
7144 | return NULL; |
7145 | } |
7146 | |
7147 | /** |
7148 | * amdgpu_device_enforce_isolation - enforce HW isolation |
7149 | * @adev: the amdgpu device pointer |
7150 | * @ring: the HW ring the job is supposed to run on |
7151 | * @job: the job which is about to be pushed to the HW ring |
7152 | * |
7153 | * Makes sure that only one client at a time can use the GFX block. |
7154 | * Returns: The dependency to wait on before the job can be pushed to the HW. |
7155 | * The function is called multiple times until NULL is returned. |
7156 | */ |
7157 | struct dma_fence *amdgpu_device_enforce_isolation(struct amdgpu_device *adev, |
7158 | struct amdgpu_ring *ring, |
7159 | struct amdgpu_job *job) |
7160 | { |
7161 | struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id]; |
7162 | struct drm_sched_fence *f = job->base.s_fence; |
7163 | struct dma_fence *dep; |
7164 | void *owner; |
7165 | int r; |
7166 | |
7167 | /* |
7168 | * For now enforce isolation only for the GFX block since we only need |
7169 | * the cleaner shader on those rings. |
7170 | */ |
7171 | if (ring->funcs->type != AMDGPU_RING_TYPE_GFX && |
7172 | ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) |
7173 | return NULL; |
7174 | |
7175 | /* |
7176 | * All submissions where enforce isolation is false are handled as if |
7177 | * they come from a single client. Use ~0l as the owner to distinct it |
7178 | * from kernel submissions where the owner is NULL. |
7179 | */ |
7180 | owner = job->enforce_isolation ? f->owner : (void *)~0l; |
7181 | |
7182 | mutex_lock(&adev->enforce_isolation_mutex); |
7183 | |
7184 | /* |
7185 | * The "spearhead" submission is the first one which changes the |
7186 | * ownership to its client. We always need to wait for it to be |
7187 | * pushed to the HW before proceeding with anything. |
7188 | */ |
7189 | if (&f->scheduled != isolation->spearhead && |
7190 | !dma_fence_is_signaled(fence: isolation->spearhead)) { |
7191 | dep = isolation->spearhead; |
7192 | goto out_grab_ref; |
7193 | } |
7194 | |
7195 | if (isolation->owner != owner) { |
7196 | |
7197 | /* |
7198 | * Wait for any gang to be assembled before switching to a |
7199 | * different owner or otherwise we could deadlock the |
7200 | * submissions. |
7201 | */ |
7202 | if (!job->gang_submit) { |
7203 | dep = amdgpu_device_get_gang(adev); |
7204 | if (!dma_fence_is_signaled(fence: dep)) |
7205 | goto out_return_dep; |
7206 | dma_fence_put(fence: dep); |
7207 | } |
7208 | |
7209 | dma_fence_put(fence: isolation->spearhead); |
7210 | isolation->spearhead = dma_fence_get(fence: &f->scheduled); |
7211 | amdgpu_sync_move(src: &isolation->active, dst: &isolation->prev); |
7212 | trace_amdgpu_isolation(prev: isolation->owner, next: owner); |
7213 | isolation->owner = owner; |
7214 | } |
7215 | |
7216 | /* |
7217 | * Specifying the ring here helps to pipeline submissions even when |
7218 | * isolation is enabled. If that is not desired for testing NULL can be |
7219 | * used instead of the ring to enforce a CPU round trip while switching |
7220 | * between clients. |
7221 | */ |
7222 | dep = amdgpu_sync_peek_fence(sync: &isolation->prev, ring); |
7223 | r = amdgpu_sync_fence(sync: &isolation->active, f: &f->finished, GFP_NOWAIT); |
7224 | if (r) |
7225 | DRM_WARN("OOM tracking isolation\n"); |
7226 | |
7227 | out_grab_ref: |
7228 | dma_fence_get(fence: dep); |
7229 | out_return_dep: |
7230 | mutex_unlock(lock: &adev->enforce_isolation_mutex); |
7231 | return dep; |
7232 | } |
7233 | |
7234 | bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev) |
7235 | { |
7236 | switch (adev->asic_type) { |
7237 | #ifdef CONFIG_DRM_AMDGPU_SI |
7238 | case CHIP_HAINAN: |
7239 | #endif |
7240 | case CHIP_TOPAZ: |
7241 | /* chips with no display hardware */ |
7242 | return false; |
7243 | #ifdef CONFIG_DRM_AMDGPU_SI |
7244 | case CHIP_TAHITI: |
7245 | case CHIP_PITCAIRN: |
7246 | case CHIP_VERDE: |
7247 | case CHIP_OLAND: |
7248 | #endif |
7249 | #ifdef CONFIG_DRM_AMDGPU_CIK |
7250 | case CHIP_BONAIRE: |
7251 | case CHIP_HAWAII: |
7252 | case CHIP_KAVERI: |
7253 | case CHIP_KABINI: |
7254 | case CHIP_MULLINS: |
7255 | #endif |
7256 | case CHIP_TONGA: |
7257 | case CHIP_FIJI: |
7258 | case CHIP_POLARIS10: |
7259 | case CHIP_POLARIS11: |
7260 | case CHIP_POLARIS12: |
7261 | case CHIP_VEGAM: |
7262 | case CHIP_CARRIZO: |
7263 | case CHIP_STONEY: |
7264 | /* chips with display hardware */ |
7265 | return true; |
7266 | default: |
7267 | /* IP discovery */ |
7268 | if (!amdgpu_ip_version(adev, ip: DCE_HWIP, inst: 0) || |
7269 | (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK)) |
7270 | return false; |
7271 | return true; |
7272 | } |
7273 | } |
7274 | |
7275 | uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev, |
7276 | uint32_t inst, uint32_t reg_addr, char reg_name[], |
7277 | uint32_t expected_value, uint32_t mask) |
7278 | { |
7279 | uint32_t ret = 0; |
7280 | uint32_t old_ = 0; |
7281 | uint32_t tmp_ = RREG32(reg_addr); |
7282 | uint32_t loop = adev->usec_timeout; |
7283 | |
7284 | while ((tmp_ & (mask)) != (expected_value)) { |
7285 | if (old_ != tmp_) { |
7286 | loop = adev->usec_timeout; |
7287 | old_ = tmp_; |
7288 | } else |
7289 | udelay(usec: 1); |
7290 | tmp_ = RREG32(reg_addr); |
7291 | loop--; |
7292 | if (!loop) { |
7293 | DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn", |
7294 | inst, reg_name, (uint32_t)expected_value, |
7295 | (uint32_t)(tmp_ & (mask))); |
7296 | ret = -ETIMEDOUT; |
7297 | break; |
7298 | } |
7299 | } |
7300 | return ret; |
7301 | } |
7302 | |
7303 | ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring) |
7304 | { |
7305 | ssize_t size = 0; |
7306 | |
7307 | if (!ring || !ring->adev) |
7308 | return size; |
7309 | |
7310 | if (amdgpu_device_should_recover_gpu(adev: ring->adev)) |
7311 | size |= AMDGPU_RESET_TYPE_FULL; |
7312 | |
7313 | if (unlikely(!ring->adev->debug_disable_soft_recovery) && |
7314 | !amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery) |
7315 | size |= AMDGPU_RESET_TYPE_SOFT_RESET; |
7316 | |
7317 | return size; |
7318 | } |
7319 | |
7320 | ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset) |
7321 | { |
7322 | ssize_t size = 0; |
7323 | |
7324 | if (supported_reset == 0) { |
7325 | size += sysfs_emit_at(buf, at: size, fmt: "unsupported"); |
7326 | size += sysfs_emit_at(buf, at: size, fmt: "\n"); |
7327 | return size; |
7328 | |
7329 | } |
7330 | |
7331 | if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET) |
7332 | size += sysfs_emit_at(buf, at: size, fmt: "soft "); |
7333 | |
7334 | if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE) |
7335 | size += sysfs_emit_at(buf, at: size, fmt: "queue "); |
7336 | |
7337 | if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE) |
7338 | size += sysfs_emit_at(buf, at: size, fmt: "pipe "); |
7339 | |
7340 | if (supported_reset & AMDGPU_RESET_TYPE_FULL) |
7341 | size += sysfs_emit_at(buf, at: size, fmt: "full "); |
7342 | |
7343 | size += sysfs_emit_at(buf, at: size, fmt: "\n"); |
7344 | return size; |
7345 | } |
7346 |
Definitions
- amdgpu_kms_driver
- amdgpu_asic_name
- amdgpu_init_default
- amdgpu_init_recovery
- amdgpu_init_minimal_xgmi
- amdgpu_ip_member_of_hwini
- amdgpu_set_init_level
- amdgpu_device_get_pcie_replay_count
- amdgpu_device_attr_sysfs_init
- amdgpu_device_attr_sysfs_fini
- amdgpu_sysfs_reg_state_get
- amdgpu_reg_state_sysfs_init
- amdgpu_reg_state_sysfs_fini
- amdgpu_ip_block_suspend
- amdgpu_ip_block_resume
- amdgpu_device_get_board_info
- amdgpu_board_attrs
- amdgpu_board_attrs_is_visible
- amdgpu_board_attrs_group
- amdgpu_device_supports_px
- amdgpu_device_supports_boco
- amdgpu_device_supports_baco
- amdgpu_device_detect_runtime_pm_mode
- amdgpu_device_supports_smart_shift
- amdgpu_device_mm_access
- amdgpu_device_aper_access
- amdgpu_device_vram_access
- amdgpu_device_skip_hw_access
- amdgpu_device_rreg
- amdgpu_mm_rreg8
- amdgpu_device_xcc_rreg
- amdgpu_mm_wreg8
- amdgpu_device_wreg
- amdgpu_mm_wreg_mmio_rlc
- amdgpu_device_xcc_wreg
- amdgpu_device_indirect_rreg
- amdgpu_device_indirect_rreg_ext
- amdgpu_device_indirect_rreg64
- amdgpu_device_indirect_rreg64_ext
- amdgpu_device_indirect_wreg
- amdgpu_device_indirect_wreg_ext
- amdgpu_device_indirect_wreg64
- amdgpu_device_indirect_wreg64_ext
- amdgpu_device_get_rev_id
- amdgpu_invalid_rreg
- amdgpu_invalid_rreg_ext
- amdgpu_invalid_wreg
- amdgpu_invalid_wreg_ext
- amdgpu_invalid_rreg64
- amdgpu_invalid_rreg64_ext
- amdgpu_invalid_wreg64
- amdgpu_invalid_wreg64_ext
- amdgpu_block_invalid_rreg
- amdgpu_block_invalid_wreg
- amdgpu_device_get_vbios_flags
- amdgpu_device_asic_init
- amdgpu_device_mem_scratch_init
- amdgpu_device_mem_scratch_fini
- amdgpu_device_program_register_sequence
- amdgpu_device_pci_config_reset
- amdgpu_device_pci_reset
- amdgpu_device_wb_fini
- amdgpu_device_wb_init
- amdgpu_device_wb_get
- amdgpu_device_wb_free
- amdgpu_device_resize_fb_bar
- amdgpu_device_need_post
- amdgpu_device_seamless_boot_supported
- amdgpu_device_pcie_dynamic_switching_supported
- amdgpu_device_aspm_support_quirk
- amdgpu_device_should_use_aspm
- amdgpu_device_vga_set_decode
- amdgpu_device_check_block_size
- amdgpu_device_check_vm_size
- amdgpu_device_check_smu_prv_buffer_size
- amdgpu_device_init_apu_flags
- amdgpu_device_check_arguments
- amdgpu_switcheroo_set_state
- amdgpu_switcheroo_can_switch
- amdgpu_switcheroo_ops
- amdgpu_device_ip_set_clockgating_state
- amdgpu_device_ip_set_powergating_state
- amdgpu_device_ip_get_clockgating_state
- amdgpu_device_ip_wait_for_idle
- amdgpu_device_ip_is_valid
- amdgpu_device_ip_get_ip_block
- amdgpu_device_ip_block_version_cmp
- amdgpu_device_ip_block_add
- amdgpu_device_enable_virtual_display
- amdgpu_device_set_sriov_virtual_display
- amdgpu_device_parse_gpu_info_fw
- amdgpu_device_ip_early_init
- amdgpu_device_ip_hw_init_phase1
- amdgpu_device_ip_hw_init_phase2
- amdgpu_device_fw_loading
- amdgpu_device_init_schedulers
- amdgpu_device_ip_init
- amdgpu_device_fill_reset_magic
- amdgpu_device_check_vram_lost
- amdgpu_device_set_cg_state
- amdgpu_device_set_pg_state
- amdgpu_device_enable_mgpu_fan_boost
- amdgpu_device_ip_late_init
- amdgpu_ip_block_hw_fini
- amdgpu_device_smu_fini_early
- amdgpu_device_ip_fini_early
- amdgpu_device_ip_fini
- amdgpu_device_delayed_init_work_handler
- amdgpu_device_delay_enable_gfx_off
- amdgpu_device_ip_suspend_phase1
- amdgpu_device_ip_suspend_phase2
- amdgpu_device_ip_suspend
- amdgpu_device_ip_reinit_early_sriov
- amdgpu_device_ip_reinit_late_sriov
- amdgpu_device_ip_resume_phase1
- amdgpu_device_ip_resume_phase2
- amdgpu_device_ip_resume_phase3
- amdgpu_device_ip_resume
- amdgpu_device_detect_sriov_bios
- amdgpu_device_asic_has_dc_support
- amdgpu_device_has_dc_support
- amdgpu_device_xgmi_reset_func
- amdgpu_device_get_job_timeout_settings
- amdgpu_device_check_iommu_direct_map
- amdgpu_device_check_iommu_remap
- amdgpu_device_set_mcbp
- amdgpu_device_init
- amdgpu_device_unmap_mmio
- amdgpu_device_fini_hw
- amdgpu_device_fini_sw
- amdgpu_device_evict_resources
- amdgpu_device_pm_notifier
- amdgpu_device_prepare
- amdgpu_device_suspend
- amdgpu_device_resume
- amdgpu_device_ip_check_soft_reset
- amdgpu_device_ip_pre_soft_reset
- amdgpu_device_ip_need_full_reset
- amdgpu_device_ip_soft_reset
- amdgpu_device_ip_post_soft_reset
- amdgpu_device_reset_sriov
- amdgpu_device_has_job_running
- amdgpu_device_should_recover_gpu
- amdgpu_device_mode1_reset
- amdgpu_device_link_reset
- amdgpu_device_pre_asic_reset
- amdgpu_device_reinit_after_reset
- amdgpu_do_asic_reset
- amdgpu_device_set_mp1_state
- amdgpu_device_unset_mp1_state
- amdgpu_device_resume_display_audio
- amdgpu_device_suspend_display_audio
- amdgpu_device_stop_pending_resets
- amdgpu_device_health_check
- amdgpu_device_halt_activities
- amdgpu_device_asic_reset
- amdgpu_device_sched_resume
- amdgpu_device_gpu_resume
- amdgpu_device_gpu_recover
- amdgpu_device_partner_bandwidth
- amdgpu_device_gpu_bandwidth
- amdgpu_device_get_pcie_info
- amdgpu_device_is_peer_accessible
- amdgpu_device_baco_enter
- amdgpu_device_baco_exit
- amdgpu_pci_error_detected
- amdgpu_pci_mmio_enabled
- amdgpu_pci_slot_reset
- amdgpu_pci_resume
- amdgpu_device_cache_pci_state
- amdgpu_device_load_pci_state
- amdgpu_device_flush_hdp
- amdgpu_device_invalidate_hdp
- amdgpu_in_reset
- amdgpu_device_halt
- amdgpu_device_pcie_port_rreg
- amdgpu_device_pcie_port_wreg
- amdgpu_device_get_gang
- amdgpu_device_switch_gang
- amdgpu_device_enforce_isolation
- amdgpu_device_has_display_hardware
- amdgpu_device_wait_on_rreg
- amdgpu_get_soft_full_reset_mask
Improve your Profiling and Debugging skills
Find out more