amdgpu_device.c source code [linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c]

1	/*
2	* Copyright 2008 Advanced Micro Devices, Inc.
3	* Copyright 2008 Red Hat Inc.
4	* Copyright 2009 Jerome Glisse.
5	*
6	* Permission is hereby granted, free of charge, to any person obtaining a
7	* copy of this software and associated documentation files (the "Software"),
8	* to deal in the Software without restriction, including without limitation
9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10	* and/or sell copies of the Software, and to permit persons to whom the
11	* Software is furnished to do so, subject to the following conditions:
12	*
13	* The above copyright notice and this permission notice shall be included in
14	* all copies or substantial portions of the Software.
15	*
16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19	* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20	* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21	* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22	* OTHER DEALINGS IN THE SOFTWARE.
23	*
24	* Authors: Dave Airlie
25	* Alex Deucher
26	* Jerome Glisse
27	*/
28
29	#include <linux/aperture.h>
30	#include <linux/power_supply.h>
31	#include <linux/kthread.h>
32	#include <linux/module.h>
33	#include <linux/console.h>
34	#include <linux/slab.h>
35	#include <linux/iommu.h>
36	#include <linux/pci.h>
37	#include <linux/pci-p2pdma.h>
38	#include <linux/apple-gmux.h>
39
40	#include <drm/drm_atomic_helper.h>
41	#include <drm/drm_client_event.h>
42	#include <drm/drm_crtc_helper.h>
43	#include <drm/drm_probe_helper.h>
44	#include <drm/amdgpu_drm.h>
45	#include <linux/device.h>
46	#include <linux/vgaarb.h>
47	#include <linux/vga_switcheroo.h>
48	#include <linux/efi.h>
49	#include "amdgpu.h"
50	#include "amdgpu_trace.h"
51	#include "amdgpu_i2c.h"
52	#include "atom.h"
53	#include "amdgpu_atombios.h"
54	#include "amdgpu_atomfirmware.h"
55	#include "amd_pcie.h"
56	#ifdef CONFIG_DRM_AMDGPU_SI
57	#include "si.h"
58	#endif
59	#ifdef CONFIG_DRM_AMDGPU_CIK
60	#include "cik.h"
61	#endif
62	#include "vi.h"
63	#include "soc15.h"
64	#include "nv.h"
65	#include "bif/bif_4_1_d.h"
66	#include <linux/firmware.h>
67	#include "amdgpu_vf_error.h"
68
69	#include "amdgpu_amdkfd.h"
70	#include "amdgpu_pm.h"
71
72	#include "amdgpu_xgmi.h"
73	#include "amdgpu_ras.h"
74	#include "amdgpu_pmu.h"
75	#include "amdgpu_fru_eeprom.h"
76	#include "amdgpu_reset.h"
77	#include "amdgpu_virt.h"
78	#include "amdgpu_dev_coredump.h"
79
80	#include <linux/suspend.h>
81	#include <drm/task_barrier.h>
82	#include <linux/pm_runtime.h>
83
84	#include <drm/drm_drv.h>
85
86	#if IS_ENABLED(CONFIG_X86)
87	#include <asm/intel-family.h>
88	#include <asm/cpu_device_id.h>
89	#endif
90
91	MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
92	MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
93	MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
94	MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
95	MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
96	MODULE_FIRMWARE("amdgpu/arcturus_gpu_info.bin");
97	MODULE_FIRMWARE("amdgpu/navi12_gpu_info.bin");
98
99	#define AMDGPU_RESUME_MS 2000
100	#define AMDGPU_MAX_RETRY_LIMIT 2
101	#define AMDGPU_RETRY_SRIOV_RESET(r) ((r) == -EBUSY \|\| (r) == -ETIMEDOUT \|\| (r) == -EINVAL)
102	#define AMDGPU_PCIE_INDEX_FALLBACK (0x38 >> 2)
103	#define AMDGPU_PCIE_INDEX_HI_FALLBACK (0x44 >> 2)
104	#define AMDGPU_PCIE_DATA_FALLBACK (0x3C >> 2)
105
106	#define AMDGPU_VBIOS_SKIP (1U << 0)
107	#define AMDGPU_VBIOS_OPTIONAL (1U << 1)
108
109	static const struct drm_driver amdgpu_kms_driver;
110
111	const char *amdgpu_asic_name[] = {
112	"TAHITI",
113	"PITCAIRN",
114	"VERDE",
115	"OLAND",
116	"HAINAN",
117	"BONAIRE",
118	"KAVERI",
119	"KABINI",
120	"HAWAII",
121	"MULLINS",
122	"TOPAZ",
123	"TONGA",
124	"FIJI",
125	"CARRIZO",
126	"STONEY",
127	"POLARIS10",
128	"POLARIS11",
129	"POLARIS12",
130	"VEGAM",
131	"VEGA10",
132	"VEGA12",
133	"VEGA20",
134	"RAVEN",
135	"ARCTURUS",
136	"RENOIR",
137	"ALDEBARAN",
138	"NAVI10",
139	"CYAN_SKILLFISH",
140	"NAVI14",
141	"NAVI12",
142	"SIENNA_CICHLID",
143	"NAVY_FLOUNDER",
144	"VANGOGH",
145	"DIMGREY_CAVEFISH",
146	"BEIGE_GOBY",
147	"YELLOW_CARP",
148	"IP DISCOVERY",
149	"LAST",
150	};
151
152	#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMD_IP_BLOCK_TYPE_NUM - 1, 0)
153	/*
154	* Default init level where all blocks are expected to be initialized. This is
155	* the level of initialization expected by default and also after a full reset
156	* of the device.
157	*/
158	struct amdgpu_init_level amdgpu_init_default = {
159	.level = AMDGPU_INIT_LEVEL_DEFAULT,
160	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
161	};
162
163	struct amdgpu_init_level amdgpu_init_recovery = {
164	.level = AMDGPU_INIT_LEVEL_RESET_RECOVERY,
165	.hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL,
166	};
167
168	/*
169	* Minimal blocks needed to be initialized before a XGMI hive can be reset. This
170	* is used for cases like reset on initialization where the entire hive needs to
171	* be reset before first use.
172	*/
173	struct amdgpu_init_level amdgpu_init_minimal_xgmi = {
174	.level = AMDGPU_INIT_LEVEL_MINIMAL_XGMI,
175	.hwini_ip_block_mask =
176	BIT(AMD_IP_BLOCK_TYPE_GMC) \| BIT(AMD_IP_BLOCK_TYPE_SMC) \|
177	BIT(AMD_IP_BLOCK_TYPE_COMMON) \| BIT(AMD_IP_BLOCK_TYPE_IH) \|
178	BIT(AMD_IP_BLOCK_TYPE_PSP)
179	};
180
181	static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev,
182	enum amd_ip_block_type block)
183	{
184	return (adev->init_lvl->hwini_ip_block_mask & (`1U` << block)) != `0`;
185	}
186
187	void amdgpu_set_init_level(struct amdgpu_device *adev,
188	enum amdgpu_init_lvl_id lvl)
189	{
190	switch (lvl) {
191	case AMDGPU_INIT_LEVEL_MINIMAL_XGMI:
192	adev->init_lvl = &amdgpu_init_minimal_xgmi;
193	break;
194	case AMDGPU_INIT_LEVEL_RESET_RECOVERY:
195	adev->init_lvl = &amdgpu_init_recovery;
196	break;
197	case AMDGPU_INIT_LEVEL_DEFAULT:
198	fallthrough;
199	default:
200	adev->init_lvl = &amdgpu_init_default;
201	break;
202	}
203	}
204
205	static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev);
206	static int amdgpu_device_pm_notifier(struct notifier_block nb, unsigned* long mode,
207	void *data);
208
209	/**
210	* DOC: pcie_replay_count
211	*
212	* The amdgpu driver provides a sysfs API for reporting the total number
213	* of PCIe replays (NAKs).
214	* The file pcie_replay_count is used for this and returns the total
215	* number of replays as a sum of the NAKs generated and NAKs received.
216	*/
217
218	static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
219	struct device_attribute attr, char* *buf)
220	{
221	struct drm_device *ddev = dev_get_drvdata(dev);
222	struct amdgpu_device *adev = drm_to_adev(ddev);
223	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
224
225	return sysfs_emit(buf, fmt: "%llu\n", cnt);
226	}
227
228	static DEVICE_ATTR(pcie_replay_count, `0444`,
229	amdgpu_device_get_pcie_replay_count, NULL);
230
231	static int amdgpu_device_attr_sysfs_init(struct amdgpu_device *adev)
232	{
233	int ret = `0`;
234
235	if (!amdgpu_sriov_vf(adev))
236	ret = sysfs_create_file(kobj: &adev->dev->kobj,
237	attr: &dev_attr_pcie_replay_count.attr);
238
239	return ret;
240	}
241
242	static void amdgpu_device_attr_sysfs_fini(struct amdgpu_device *adev)
243	{
244	if (!amdgpu_sriov_vf(adev))
245	sysfs_remove_file(kobj: &adev->dev->kobj,
246	attr: &dev_attr_pcie_replay_count.attr);
247	}
248
249	static ssize_t amdgpu_sysfs_reg_state_get(struct file f, struct* kobject *kobj,
250	const struct bin_attribute attr, char* *buf,
251	loff_t ppos, size_t count)
252	{
253	struct device *dev = kobj_to_dev(kobj);
254	struct drm_device *ddev = dev_get_drvdata(dev);
255	struct amdgpu_device *adev = drm_to_adev(ddev);
256	ssize_t bytes_read;
257
258	switch (ppos) {
259	case AMDGPU_SYS_REG_STATE_XGMI:
260	bytes_read = amdgpu_asic_get_reg_state(
261	adev, AMDGPU_REG_STATE_TYPE_XGMI, buf, count);
262	break;
263	case AMDGPU_SYS_REG_STATE_WAFL:
264	bytes_read = amdgpu_asic_get_reg_state(
265	adev, AMDGPU_REG_STATE_TYPE_WAFL, buf, count);
266	break;
267	case AMDGPU_SYS_REG_STATE_PCIE:
268	bytes_read = amdgpu_asic_get_reg_state(
269	adev, AMDGPU_REG_STATE_TYPE_PCIE, buf, count);
270	break;
271	case AMDGPU_SYS_REG_STATE_USR:
272	bytes_read = amdgpu_asic_get_reg_state(
273	adev, AMDGPU_REG_STATE_TYPE_USR, buf, count);
274	break;
275	case AMDGPU_SYS_REG_STATE_USR_1:
276	bytes_read = amdgpu_asic_get_reg_state(
277	adev, AMDGPU_REG_STATE_TYPE_USR_1, buf, count);
278	break;
279	default:
280	return -EINVAL;
281	}
282
283	return bytes_read;
284	}
285
286	static const BIN_ATTR(reg_state, `0444`, amdgpu_sysfs_reg_state_get, NULL,
287	AMDGPU_SYS_REG_STATE_END);
288
289	int amdgpu_reg_state_sysfs_init(struct amdgpu_device *adev)
290	{
291	int ret;
292
293	if (!amdgpu_asic_get_reg_state_supported(adev))
294	return `0`;
295
296	ret = sysfs_create_bin_file(kobj: &adev->dev->kobj, attr: &bin_attr_reg_state);
297
298	return ret;
299	}
300
301	void amdgpu_reg_state_sysfs_fini(struct amdgpu_device *adev)
302	{
303	if (!amdgpu_asic_get_reg_state_supported(adev))
304	return;
305	sysfs_remove_bin_file(kobj: &adev->dev->kobj, attr: &bin_attr_reg_state);
306	}
307
308	int amdgpu_ip_block_suspend(struct amdgpu_ip_block *ip_block)
309	{
310	int r;
311
312	if (ip_block->version->funcs->suspend) {
313	r = ip_block->version->funcs->suspend(ip_block);
314	if (r) {
315	dev_err(ip_block->adev->dev,
316	"suspend of IP block <%s> failed %d\n",
317	ip_block->version->funcs->name, r);
318	return r;
319	}
320	}
321
322	ip_block->status.hw = false;
323	return `0`;
324	}
325
326	int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block)
327	{
328	int r;
329
330	if (ip_block->version->funcs->resume) {
331	r = ip_block->version->funcs->resume(ip_block);
332	if (r) {
333	dev_err(ip_block->adev->dev,
334	"resume of IP block <%s> failed %d\n",
335	ip_block->version->funcs->name, r);
336	return r;
337	}
338	}
339
340	ip_block->status.hw = true;
341	return `0`;
342	}
343
344	/**
345	* DOC: board_info
346	*
347	* The amdgpu driver provides a sysfs API for giving board related information.
348	* It provides the form factor information in the format
349	*
350	* type : form factor
351	*
352	* Possible form factor values
353	*
354	* - "cem" - PCIE CEM card
355	* - "oam" - Open Compute Accelerator Module
356	* - "unknown" - Not known
357	*
358	*/
359
360	static ssize_t amdgpu_device_get_board_info(struct device *dev,
361	struct device_attribute *attr,
362	char *buf)
363	{
364	struct drm_device *ddev = dev_get_drvdata(dev);
365	struct amdgpu_device *adev = drm_to_adev(ddev);
366	enum amdgpu_pkg_type pkg_type = AMDGPU_PKG_TYPE_CEM;
367	const char *pkg;
368
369	if (adev->smuio.funcs && adev->smuio.funcs->get_pkg_type)
370	pkg_type = adev->smuio.funcs->get_pkg_type(adev);
371
372	switch (pkg_type) {
373	case AMDGPU_PKG_TYPE_CEM:
374	pkg = "cem";
375	break;
376	case AMDGPU_PKG_TYPE_OAM:
377	pkg = "oam";
378	break;
379	default:
380	pkg = "unknown";
381	break;
382	}
383
384	return sysfs_emit(buf, fmt: "%s : %s\n", "type", pkg);
385	}
386
387	static DEVICE_ATTR(board_info, `0444`, amdgpu_device_get_board_info, NULL);
388
389	static struct attribute *amdgpu_board_attrs[] = {
390	&dev_attr_board_info.attr,
391	NULL,
392	};
393
394	static umode_t amdgpu_board_attrs_is_visible(struct kobject *kobj,
395	struct attribute attr, int* n)
396	{
397	struct device *dev = kobj_to_dev(kobj);
398	struct drm_device *ddev = dev_get_drvdata(dev);
399	struct amdgpu_device *adev = drm_to_adev(ddev);
400
401	if (adev->flags & AMD_IS_APU)
402	return `0`;
403
404	return attr->mode;
405	}
406
407	static const struct attribute_group amdgpu_board_attrs_group = {
408	.attrs = amdgpu_board_attrs,
409	.is_visible = amdgpu_board_attrs_is_visible
410	};
411
412	static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);
413
414
415	/**
416	* amdgpu_device_supports_px - Is the device a dGPU with ATPX power control
417	*
418	* @dev: drm_device pointer
419	*
420	* Returns true if the device is a dGPU with ATPX power control,
421	* otherwise return false.
422	*/
423	bool amdgpu_device_supports_px(struct drm_device *dev)
424	{
425	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
426
427	if ((adev->flags & AMD_IS_PX) && !amdgpu_is_atpx_hybrid())
428	return true;
429	return false;
430	}
431
432	/**
433	* amdgpu_device_supports_boco - Is the device a dGPU with ACPI power resources
434	*
435	* @dev: drm_device pointer
436	*
437	* Returns true if the device is a dGPU with ACPI power control,
438	* otherwise return false.
439	*/
440	bool amdgpu_device_supports_boco(struct drm_device *dev)
441	{
442	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
443
444	if (!IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE))
445	return false;
446
447	if (adev->has_pr3 \|\|
448	((adev->flags & AMD_IS_PX) && amdgpu_is_atpx_hybrid()))
449	return true;
450	return false;
451	}
452
453	/**
454	* amdgpu_device_supports_baco - Does the device support BACO
455	*
456	* @dev: drm_device pointer
457	*
458	* Return:
459	* 1 if the device supports BACO;
460	* 3 if the device supports MACO (only works if BACO is supported)
461	* otherwise return 0.
462	*/
463	int amdgpu_device_supports_baco(struct drm_device *dev)
464	{
465	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
466
467	return amdgpu_asic_supports_baco(adev);
468	}
469
470	void amdgpu_device_detect_runtime_pm_mode(struct amdgpu_device *adev)
471	{
472	struct drm_device *dev;
473	int bamaco_support;
474
475	dev = adev_to_drm(adev);
476
477	adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
478	bamaco_support = amdgpu_device_supports_baco(dev);
479
480	switch (amdgpu_runtime_pm) {
481	case `2`:
482	if (bamaco_support & MACO_SUPPORT) {
483	adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
484	dev_info(adev->dev, "Forcing BAMACO for runtime pm\n");
485	} else if (bamaco_support == BACO_SUPPORT) {
486	adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
487	dev_info(adev->dev, "Requested mode BAMACO not available,fallback to use BACO\n");
488	}
489	break;
490	case `1`:
491	if (bamaco_support & BACO_SUPPORT) {
492	adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
493	dev_info(adev->dev, "Forcing BACO for runtime pm\n");
494	}
495	break;
496	case -`1`:
497	case -`2`:
498	if (amdgpu_device_supports_px(dev)) { / enable PX as runtime mode /
499	adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
500	dev_info(adev->dev, "Using ATPX for runtime pm\n");
501	} else if (amdgpu_device_supports_boco(dev)) { / enable boco as runtime mode /
502	adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
503	dev_info(adev->dev, "Using BOCO for runtime pm\n");
504	} else {
505	if (!bamaco_support)
506	goto no_runtime_pm;
507
508	switch (adev->asic_type) {
509	case CHIP_VEGA20:
510	case CHIP_ARCTURUS:
511	/ BACO are not supported on vega20 and arctrus /
512	break;
513	case CHIP_VEGA10:
514	/ enable BACO as runpm mode if noretry=0 /
515	if (!adev->gmc.noretry && !amdgpu_passthrough(adev))
516	adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
517	break;
518	default:
519	/ enable BACO as runpm mode on CI+ /
520	if (!amdgpu_passthrough(adev))
521	adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
522	break;
523	}
524
525	if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO) {
526	if (bamaco_support & MACO_SUPPORT) {
527	adev->pm.rpm_mode = AMDGPU_RUNPM_BAMACO;
528	dev_info(adev->dev, "Using BAMACO for runtime pm\n");
529	} else {
530	dev_info(adev->dev, "Using BACO for runtime pm\n");
531	}
532	}
533	}
534	break;
535	case `0`:
536	dev_info(adev->dev, "runtime pm is manually disabled\n");
537	break;
538	default:
539	break;
540	}
541
542	no_runtime_pm:
543	if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
544	dev_info(adev->dev, "Runtime PM not available\n");
545	}
546	/**
547	* amdgpu_device_supports_smart_shift - Is the device dGPU with
548	* smart shift support
549	*
550	* @dev: drm_device pointer
551	*
552	* Returns true if the device is a dGPU with Smart Shift support,
553	* otherwise returns false.
554	*/
555	bool amdgpu_device_supports_smart_shift(struct drm_device *dev)
556	{
557	return (amdgpu_device_supports_boco(dev) &&
558	amdgpu_acpi_is_power_shift_control_supported());
559	}
560
561	/*
562	* VRAM access helper functions
563	*/
564
565	/**
566	* amdgpu_device_mm_access - access vram by MM_INDEX/MM_DATA
567	*
568	* @adev: amdgpu_device pointer
569	* @pos: offset of the buffer in vram
570	* @buf: virtual address of the buffer in system memory
571	* @size: read/write size, sizeof(@buf) must > @size
572	* @write: true - write to vram, otherwise - read from vram
573	*/
574	void amdgpu_device_mm_access(struct amdgpu_device *adev, loff_t pos,
575	void *buf, size_t size, bool write)
576	{
577	unsigned long flags;
578	uint32_t hi = ~`0`, tmp = `0`;
579	uint32_t *data = buf;
580	uint64_t last;
581	int idx;
582
583	if (!drm_dev_enter(dev: adev_to_drm(adev), idx: &idx))
584	return;
585
586	BUG_ON(!IS_ALIGNED(pos, `4`) \|\| !IS_ALIGNED(size, `4`));
587
588	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
589	for (last = pos + size; pos < last; pos += `4`) {
590	tmp = pos >> `31`;
591
592	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) \| `0x80000000`);
593	if (tmp != hi) {
594	WREG32_NO_KIQ(mmMM_INDEX_HI, tmp);
595	hi = tmp;
596	}
597	if (write)
598	WREG32_NO_KIQ(mmMM_DATA, *data++);
599	else
600	*data++ = RREG32_NO_KIQ(mmMM_DATA);
601	}
602
603	spin_unlock_irqrestore(lock: &adev->mmio_idx_lock, flags);
604	drm_dev_exit(idx);
605	}
606
607	/**
608	* amdgpu_device_aper_access - access vram by vram aperture
609	*
610	* @adev: amdgpu_device pointer
611	* @pos: offset of the buffer in vram
612	* @buf: virtual address of the buffer in system memory
613	* @size: read/write size, sizeof(@buf) must > @size
614	* @write: true - write to vram, otherwise - read from vram
615	*
616	* The return value means how many bytes have been transferred.
617	*/
618	size_t amdgpu_device_aper_access(struct amdgpu_device *adev, loff_t pos,
619	void *buf, size_t size, bool write)
620	{
621	#ifdef CONFIG_64BIT
622	void __iomem *addr;
623	size_t count = `0`;
624	uint64_t last;
625
626	if (!adev->mman.aper_base_kaddr)
627	return `0`;
628
629	last = min(pos + size, adev->gmc.visible_vram_size);
630	if (last > pos) {
631	addr = adev->mman.aper_base_kaddr + pos;
632	count = last - pos;
633
634	if (write) {
635	memcpy_toio(addr, buf, count);
636	/ Make sure HDP write cache flush happens without any reordering*
637	* after the system memory contents are sent over PCIe device
638	*/
639	mb();
640	amdgpu_device_flush_hdp(adev, NULL);
641	} else {
642	amdgpu_device_invalidate_hdp(adev, NULL);
643	/ Make sure HDP read cache is invalidated before issuing a read*
644	* to the PCIe device
645	*/
646	mb();
647	memcpy_fromio(buf, addr, count);
648	}
649
650	}
651
652	return count;
653	#else
654	return `0`;
655	#endif
656	}
657
658	/**
659	* amdgpu_device_vram_access - read/write a buffer in vram
660	*
661	* @adev: amdgpu_device pointer
662	* @pos: offset of the buffer in vram
663	* @buf: virtual address of the buffer in system memory
664	* @size: read/write size, sizeof(@buf) must > @size
665	* @write: true - write to vram, otherwise - read from vram
666	*/
667	void amdgpu_device_vram_access(struct amdgpu_device *adev, loff_t pos,
668	void *buf, size_t size, bool write)
669	{
670	size_t count;
671
672	/ try to using vram apreature to access vram first /
673	count = amdgpu_device_aper_access(adev, pos, buf, size, write);
674	size -= count;
675	if (size) {
676	/ using MM to access rest vram /
677	pos += count;
678	buf += count;
679	amdgpu_device_mm_access(adev, pos, buf, size, write);
680	}
681	}
682
683	/*
684	* register access helper functions.
685	*/
686
687	/ Check if hw access should be skipped because of hotplug or device error /
688	bool amdgpu_device_skip_hw_access(struct amdgpu_device *adev)
689	{
690	if (adev->no_hw_access)
691	return true;
692
693	#ifdef CONFIG_LOCKDEP
694	/*
695	* This is a bit complicated to understand, so worth a comment. What we assert
696	* here is that the GPU reset is not running on another thread in parallel.
697	*
698	* For this we trylock the read side of the reset semaphore, if that succeeds
699	* we know that the reset is not running in parallel.
700	*
701	* If the trylock fails we assert that we are either already holding the read
702	* side of the lock or are the reset thread itself and hold the write side of
703	* the lock.
704	*/
705	if (in_task()) {
706	if (down_read_trylock(sem: &adev->reset_domain->sem))
707	up_read(sem: &adev->reset_domain->sem);
708	else
709	lockdep_assert_held(&adev->reset_domain->sem);
710	}
711	#endif
712	return false;
713	}
714
715	/**
716	* amdgpu_device_rreg - read a memory mapped IO or indirect register
717	*
718	* @adev: amdgpu_device pointer
719	* @reg: dword aligned register offset
720	* @acc_flags: access flags which require special behavior
721	*
722	* Returns the 32 bit value from the offset specified.
723	*/
724	uint32_t amdgpu_device_rreg(struct amdgpu_device *adev,
725	uint32_t reg, uint32_t acc_flags)
726	{
727	uint32_t ret;
728
729	if (amdgpu_device_skip_hw_access(adev))
730	return `0`;
731
732	if ((reg * `4`) < adev->rmmio_size) {
733	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
734	amdgpu_sriov_runtime(adev) &&
735	down_read_trylock(sem: &adev->reset_domain->sem)) {
736	ret = amdgpu_kiq_rreg(adev, reg, xcc_id: `0`);
737	up_read(sem: &adev->reset_domain->sem);
738	} else {
739	ret = readl(addr: ((void __iomem )adev->rmmio) + (reg `4`));
740	}
741	} else {
742	ret = adev->pcie_rreg(adev, reg * `4`);
743	}
744
745	trace_amdgpu_device_rreg(did: adev->pdev->device, reg, value: ret);
746
747	return ret;
748	}
749
750	/*
751	* MMIO register read with bytes helper functions
752	* @offset:bytes offset from MMIO start
753	*/
754
755	/**
756	* amdgpu_mm_rreg8 - read a memory mapped IO register
757	*
758	* @adev: amdgpu_device pointer
759	* @offset: byte aligned register offset
760	*
761	* Returns the 8 bit value from the offset specified.
762	*/
763	uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)
764	{
765	if (amdgpu_device_skip_hw_access(adev))
766	return `0`;
767
768	if (offset < adev->rmmio_size)
769	return (readb(addr: adev->rmmio + offset));
770	BUG();
771	}
772
773
774	/**
775	* amdgpu_device_xcc_rreg - read a memory mapped IO or indirect register with specific XCC
776	*
777	* @adev: amdgpu_device pointer
778	* @reg: dword aligned register offset
779	* @acc_flags: access flags which require special behavior
780	* @xcc_id: xcc accelerated compute core id
781	*
782	* Returns the 32 bit value from the offset specified.
783	*/
784	uint32_t amdgpu_device_xcc_rreg(struct amdgpu_device *adev,
785	uint32_t reg, uint32_t acc_flags,
786	uint32_t xcc_id)
787	{
788	uint32_t ret, rlcg_flag;
789
790	if (amdgpu_device_skip_hw_access(adev))
791	return `0`;
792
793	if ((reg * `4`) < adev->rmmio_size) {
794	if (amdgpu_sriov_vf(adev) &&
795	!amdgpu_sriov_runtime(adev) &&
796	adev->gfx.rlc.rlcg_reg_access_supported &&
797	amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
798	hwip: GC_HWIP, write: false,
799	rlcg_flag: &rlcg_flag)) {
800	ret = amdgpu_virt_rlcg_reg_rw(adev, offset: reg, v: `0`, flag: rlcg_flag, GET_INST(GC, xcc_id));
801	} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
802	amdgpu_sriov_runtime(adev) &&
803	down_read_trylock(sem: &adev->reset_domain->sem)) {
804	ret = amdgpu_kiq_rreg(adev, reg, xcc_id);
805	up_read(sem: &adev->reset_domain->sem);
806	} else {
807	ret = readl(addr: ((void __iomem )adev->rmmio) + (reg `4`));
808	}
809	} else {
810	ret = adev->pcie_rreg(adev, reg * `4`);
811	}
812
813	return ret;
814	}
815
816	/*
817	* MMIO register write with bytes helper functions
818	* @offset:bytes offset from MMIO start
819	* @value: the value want to be written to the register
820	*/
821
822	/**
823	* amdgpu_mm_wreg8 - read a memory mapped IO register
824	*
825	* @adev: amdgpu_device pointer
826	* @offset: byte aligned register offset
827	* @value: 8 bit value to write
828	*
829	* Writes the value specified to the offset specified.
830	*/
831	void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)
832	{
833	if (amdgpu_device_skip_hw_access(adev))
834	return;
835
836	if (offset < adev->rmmio_size)
837	writeb(val: value, addr: adev->rmmio + offset);
838	else
839	BUG();
840	}
841
842	/**
843	* amdgpu_device_wreg - write to a memory mapped IO or indirect register
844	*
845	* @adev: amdgpu_device pointer
846	* @reg: dword aligned register offset
847	* @v: 32 bit value to write to the register
848	* @acc_flags: access flags which require special behavior
849	*
850	* Writes the value specified to the offset specified.
851	*/
852	void amdgpu_device_wreg(struct amdgpu_device *adev,
853	uint32_t reg, uint32_t v,
854	uint32_t acc_flags)
855	{
856	if (amdgpu_device_skip_hw_access(adev))
857	return;
858
859	if ((reg * `4`) < adev->rmmio_size) {
860	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
861	amdgpu_sriov_runtime(adev) &&
862	down_read_trylock(sem: &adev->reset_domain->sem)) {
863	amdgpu_kiq_wreg(adev, reg, v, xcc_id: `0`);
864	up_read(sem: &adev->reset_domain->sem);
865	} else {
866	writel(val: v, addr: ((void __iomem )adev->rmmio) + (reg `4`));
867	}
868	} else {
869	adev->pcie_wreg(adev, reg * `4`, v);
870	}
871
872	trace_amdgpu_device_wreg(did: adev->pdev->device, reg, value: v);
873	}
874
875	/**
876	* amdgpu_mm_wreg_mmio_rlc - write register either with direct/indirect mmio or with RLC path if in range
877	*
878	* @adev: amdgpu_device pointer
879	* @reg: mmio/rlc register
880	* @v: value to write
881	* @xcc_id: xcc accelerated compute core id
882	*
883	* this function is invoked only for the debugfs register access
884	*/
885	void amdgpu_mm_wreg_mmio_rlc(struct amdgpu_device *adev,
886	uint32_t reg, uint32_t v,
887	uint32_t xcc_id)
888	{
889	if (amdgpu_device_skip_hw_access(adev))
890	return;
891
892	if (amdgpu_sriov_fullaccess(adev) &&
893	adev->gfx.rlc.funcs &&
894	adev->gfx.rlc.funcs->is_rlcg_access_range) {
895	if (adev->gfx.rlc.funcs->is_rlcg_access_range(adev, reg))
896	return amdgpu_sriov_wreg(adev, offset: reg, value: v, acc_flags: `0`, hwip: `0`, xcc_id);
897	} else if ((reg * `4`) >= adev->rmmio_size) {
898	adev->pcie_wreg(adev, reg * `4`, v);
899	} else {
900	writel(val: v, addr: ((void __iomem )adev->rmmio) + (reg `4`));
901	}
902	}
903
904	/**
905	* amdgpu_device_xcc_wreg - write to a memory mapped IO or indirect register with specific XCC
906	*
907	* @adev: amdgpu_device pointer
908	* @reg: dword aligned register offset
909	* @v: 32 bit value to write to the register
910	* @acc_flags: access flags which require special behavior
911	* @xcc_id: xcc accelerated compute core id
912	*
913	* Writes the value specified to the offset specified.
914	*/
915	void amdgpu_device_xcc_wreg(struct amdgpu_device *adev,
916	uint32_t reg, uint32_t v,
917	uint32_t acc_flags, uint32_t xcc_id)
918	{
919	uint32_t rlcg_flag;
920
921	if (amdgpu_device_skip_hw_access(adev))
922	return;
923
924	if ((reg * `4`) < adev->rmmio_size) {
925	if (amdgpu_sriov_vf(adev) &&
926	!amdgpu_sriov_runtime(adev) &&
927	adev->gfx.rlc.rlcg_reg_access_supported &&
928	amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags,
929	hwip: GC_HWIP, write: true,
930	rlcg_flag: &rlcg_flag)) {
931	amdgpu_virt_rlcg_reg_rw(adev, offset: reg, v, flag: rlcg_flag, GET_INST(GC, xcc_id));
932	} else if (!(acc_flags & AMDGPU_REGS_NO_KIQ) &&
933	amdgpu_sriov_runtime(adev) &&
934	down_read_trylock(sem: &adev->reset_domain->sem)) {
935	amdgpu_kiq_wreg(adev, reg, v, xcc_id);
936	up_read(sem: &adev->reset_domain->sem);
937	} else {
938	writel(val: v, addr: ((void __iomem )adev->rmmio) + (reg `4`));
939	}
940	} else {
941	adev->pcie_wreg(adev, reg * `4`, v);
942	}
943	}
944
945	/**
946	* amdgpu_device_indirect_rreg - read an indirect register
947	*
948	* @adev: amdgpu_device pointer
949	* @reg_addr: indirect register address to read from
950	*
951	* Returns the value of indirect register @reg_addr
952	*/
953	u32 amdgpu_device_indirect_rreg(struct amdgpu_device *adev,
954	u32 reg_addr)
955	{
956	unsigned long flags, pcie_index, pcie_data;
957	void __iomem *pcie_index_offset;
958	void __iomem *pcie_data_offset;
959	u32 r;
960
961	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
962	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
963
964	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
965	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
966	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
967
968	writel(val: reg_addr, addr: pcie_index_offset);
969	readl(addr: pcie_index_offset);
970	r = readl(addr: pcie_data_offset);
971	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
972
973	return r;
974	}
975
976	u32 amdgpu_device_indirect_rreg_ext(struct amdgpu_device *adev,
977	u64 reg_addr)
978	{
979	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
980	u32 r;
981	void __iomem *pcie_index_offset;
982	void __iomem *pcie_index_hi_offset;
983	void __iomem *pcie_data_offset;
984
985	if (unlikely(!adev->nbio.funcs)) {
986	pcie_index = AMDGPU_PCIE_INDEX_FALLBACK;
987	pcie_data = AMDGPU_PCIE_DATA_FALLBACK;
988	} else {
989	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
990	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
991	}
992
993	if (reg_addr >> `32`) {
994	if (unlikely(!adev->nbio.funcs))
995	pcie_index_hi = AMDGPU_PCIE_INDEX_HI_FALLBACK;
996	else
997	pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
998	} else {
999	pcie_index_hi = `0`;
1000	}
1001
1002	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1003	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1004	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1005	if (pcie_index_hi != `0`)
1006	pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1007	pcie_index_hi * `4`;
1008
1009	writel(val: reg_addr, addr: pcie_index_offset);
1010	readl(addr: pcie_index_offset);
1011	if (pcie_index_hi != `0`) {
1012	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1013	readl(addr: pcie_index_hi_offset);
1014	}
1015	r = readl(addr: pcie_data_offset);
1016
1017	/ clear the high bits /
1018	if (pcie_index_hi != `0`) {
1019	writel(val: `0`, addr: pcie_index_hi_offset);
1020	readl(addr: pcie_index_hi_offset);
1021	}
1022
1023	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1024
1025	return r;
1026	}
1027
1028	/**
1029	* amdgpu_device_indirect_rreg64 - read a 64bits indirect register
1030	*
1031	* @adev: amdgpu_device pointer
1032	* @reg_addr: indirect register address to read from
1033	*
1034	* Returns the value of indirect register @reg_addr
1035	*/
1036	u64 amdgpu_device_indirect_rreg64(struct amdgpu_device *adev,
1037	u32 reg_addr)
1038	{
1039	unsigned long flags, pcie_index, pcie_data;
1040	void __iomem *pcie_index_offset;
1041	void __iomem *pcie_data_offset;
1042	u64 r;
1043
1044	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1045	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1046
1047	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1048	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1049	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1050
1051	/ read low 32 bits /
1052	writel(val: reg_addr, addr: pcie_index_offset);
1053	readl(addr: pcie_index_offset);
1054	r = readl(addr: pcie_data_offset);
1055	/ read high 32 bits /
1056	writel(val: reg_addr + `4`, addr: pcie_index_offset);
1057	readl(addr: pcie_index_offset);
1058	r \|= ((u64)readl(addr: pcie_data_offset) << `32`);
1059	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1060
1061	return r;
1062	}
1063
1064	u64 amdgpu_device_indirect_rreg64_ext(struct amdgpu_device *adev,
1065	u64 reg_addr)
1066	{
1067	unsigned long flags, pcie_index, pcie_data;
1068	unsigned long pcie_index_hi = `0`;
1069	void __iomem *pcie_index_offset;
1070	void __iomem *pcie_index_hi_offset;
1071	void __iomem *pcie_data_offset;
1072	u64 r;
1073
1074	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1075	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1076	if ((reg_addr >> `32`) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1077	pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1078
1079	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1080	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1081	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1082	if (pcie_index_hi != `0`)
1083	pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1084	pcie_index_hi * `4`;
1085
1086	/ read low 32 bits /
1087	writel(val: reg_addr, addr: pcie_index_offset);
1088	readl(addr: pcie_index_offset);
1089	if (pcie_index_hi != `0`) {
1090	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1091	readl(addr: pcie_index_hi_offset);
1092	}
1093	r = readl(addr: pcie_data_offset);
1094	/ read high 32 bits /
1095	writel(val: reg_addr + `4`, addr: pcie_index_offset);
1096	readl(addr: pcie_index_offset);
1097	if (pcie_index_hi != `0`) {
1098	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1099	readl(addr: pcie_index_hi_offset);
1100	}
1101	r \|= ((u64)readl(addr: pcie_data_offset) << `32`);
1102
1103	/ clear the high bits /
1104	if (pcie_index_hi != `0`) {
1105	writel(val: `0`, addr: pcie_index_hi_offset);
1106	readl(addr: pcie_index_hi_offset);
1107	}
1108
1109	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1110
1111	return r;
1112	}
1113
1114	/**
1115	* amdgpu_device_indirect_wreg - write an indirect register address
1116	*
1117	* @adev: amdgpu_device pointer
1118	* @reg_addr: indirect register offset
1119	* @reg_data: indirect register data
1120	*
1121	*/
1122	void amdgpu_device_indirect_wreg(struct amdgpu_device *adev,
1123	u32 reg_addr, u32 reg_data)
1124	{
1125	unsigned long flags, pcie_index, pcie_data;
1126	void __iomem *pcie_index_offset;
1127	void __iomem *pcie_data_offset;
1128
1129	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1130	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1131
1132	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1133	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1134	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1135
1136	writel(val: reg_addr, addr: pcie_index_offset);
1137	readl(addr: pcie_index_offset);
1138	writel(val: reg_data, addr: pcie_data_offset);
1139	readl(addr: pcie_data_offset);
1140	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1141	}
1142
1143	void amdgpu_device_indirect_wreg_ext(struct amdgpu_device *adev,
1144	u64 reg_addr, u32 reg_data)
1145	{
1146	unsigned long flags, pcie_index, pcie_index_hi, pcie_data;
1147	void __iomem *pcie_index_offset;
1148	void __iomem *pcie_index_hi_offset;
1149	void __iomem *pcie_data_offset;
1150
1151	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1152	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1153	if ((reg_addr >> `32`) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1154	pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1155	else
1156	pcie_index_hi = `0`;
1157
1158	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1159	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1160	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1161	if (pcie_index_hi != `0`)
1162	pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1163	pcie_index_hi * `4`;
1164
1165	writel(val: reg_addr, addr: pcie_index_offset);
1166	readl(addr: pcie_index_offset);
1167	if (pcie_index_hi != `0`) {
1168	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1169	readl(addr: pcie_index_hi_offset);
1170	}
1171	writel(val: reg_data, addr: pcie_data_offset);
1172	readl(addr: pcie_data_offset);
1173
1174	/ clear the high bits /
1175	if (pcie_index_hi != `0`) {
1176	writel(val: `0`, addr: pcie_index_hi_offset);
1177	readl(addr: pcie_index_hi_offset);
1178	}
1179
1180	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1181	}
1182
1183	/**
1184	* amdgpu_device_indirect_wreg64 - write a 64bits indirect register address
1185	*
1186	* @adev: amdgpu_device pointer
1187	* @reg_addr: indirect register offset
1188	* @reg_data: indirect register data
1189	*
1190	*/
1191	void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
1192	u32 reg_addr, u64 reg_data)
1193	{
1194	unsigned long flags, pcie_index, pcie_data;
1195	void __iomem *pcie_index_offset;
1196	void __iomem *pcie_data_offset;
1197
1198	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1199	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1200
1201	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1202	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1203	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1204
1205	/ write low 32 bits /
1206	writel(val: reg_addr, addr: pcie_index_offset);
1207	readl(addr: pcie_index_offset);
1208	writel(val: (u32)(reg_data & `0xffffffffULL`), addr: pcie_data_offset);
1209	readl(addr: pcie_data_offset);
1210	/ write high 32 bits /
1211	writel(val: reg_addr + `4`, addr: pcie_index_offset);
1212	readl(addr: pcie_index_offset);
1213	writel(val: (u32)(reg_data >> `32`), addr: pcie_data_offset);
1214	readl(addr: pcie_data_offset);
1215	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1216	}
1217
1218	void amdgpu_device_indirect_wreg64_ext(struct amdgpu_device *adev,
1219	u64 reg_addr, u64 reg_data)
1220	{
1221	unsigned long flags, pcie_index, pcie_data;
1222	unsigned long pcie_index_hi = `0`;
1223	void __iomem *pcie_index_offset;
1224	void __iomem *pcie_index_hi_offset;
1225	void __iomem *pcie_data_offset;
1226
1227	pcie_index = adev->nbio.funcs->get_pcie_index_offset(adev);
1228	pcie_data = adev->nbio.funcs->get_pcie_data_offset(adev);
1229	if ((reg_addr >> `32`) && (adev->nbio.funcs->get_pcie_index_hi_offset))
1230	pcie_index_hi = adev->nbio.funcs->get_pcie_index_hi_offset(adev);
1231
1232	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
1233	pcie_index_offset = (void __iomem )adev->rmmio + pcie_index `4`;
1234	pcie_data_offset = (void __iomem )adev->rmmio + pcie_data `4`;
1235	if (pcie_index_hi != `0`)
1236	pcie_index_hi_offset = (void __iomem *)adev->rmmio +
1237	pcie_index_hi * `4`;
1238
1239	/ write low 32 bits /
1240	writel(val: reg_addr, addr: pcie_index_offset);
1241	readl(addr: pcie_index_offset);
1242	if (pcie_index_hi != `0`) {
1243	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1244	readl(addr: pcie_index_hi_offset);
1245	}
1246	writel(val: (u32)(reg_data & `0xffffffffULL`), addr: pcie_data_offset);
1247	readl(addr: pcie_data_offset);
1248	/ write high 32 bits /
1249	writel(val: reg_addr + `4`, addr: pcie_index_offset);
1250	readl(addr: pcie_index_offset);
1251	if (pcie_index_hi != `0`) {
1252	writel(val: (reg_addr >> `32`) & `0xff`, addr: pcie_index_hi_offset);
1253	readl(addr: pcie_index_hi_offset);
1254	}
1255	writel(val: (u32)(reg_data >> `32`), addr: pcie_data_offset);
1256	readl(addr: pcie_data_offset);
1257
1258	/ clear the high bits /
1259	if (pcie_index_hi != `0`) {
1260	writel(val: `0`, addr: pcie_index_hi_offset);
1261	readl(addr: pcie_index_hi_offset);
1262	}
1263
1264	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
1265	}
1266
1267	/**
1268	* amdgpu_device_get_rev_id - query device rev_id
1269	*
1270	* @adev: amdgpu_device pointer
1271	*
1272	* Return device rev_id
1273	*/
1274	u32 amdgpu_device_get_rev_id(struct amdgpu_device *adev)
1275	{
1276	return adev->nbio.funcs->get_rev_id(adev);
1277	}
1278
1279	/**
1280	* amdgpu_invalid_rreg - dummy reg read function
1281	*
1282	* @adev: amdgpu_device pointer
1283	* @reg: offset of register
1284	*
1285	* Dummy register read function. Used for register blocks
1286	* that certain asics don't have (all asics).
1287	* Returns the value in the register.
1288	*/
1289	static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
1290	{
1291	DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
1292	BUG();
1293	return `0`;
1294	}
1295
1296	static uint32_t amdgpu_invalid_rreg_ext(struct amdgpu_device *adev, uint64_t reg)
1297	{
1298	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1299	BUG();
1300	return `0`;
1301	}
1302
1303	/**
1304	* amdgpu_invalid_wreg - dummy reg write function
1305	*
1306	* @adev: amdgpu_device pointer
1307	* @reg: offset of register
1308	* @v: value to write to the register
1309	*
1310	* Dummy register read function. Used for register blocks
1311	* that certain asics don't have (all asics).
1312	*/
1313	static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
1314	{
1315	DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
1316	reg, v);
1317	BUG();
1318	}
1319
1320	static void amdgpu_invalid_wreg_ext(struct amdgpu_device *adev, uint64_t reg, uint32_t v)
1321	{
1322	DRM_ERROR("Invalid callback to write register 0x%llX with 0x%08X\n",
1323	reg, v);
1324	BUG();
1325	}
1326
1327	/**
1328	* amdgpu_invalid_rreg64 - dummy 64 bit reg read function
1329	*
1330	* @adev: amdgpu_device pointer
1331	* @reg: offset of register
1332	*
1333	* Dummy register read function. Used for register blocks
1334	* that certain asics don't have (all asics).
1335	* Returns the value in the register.
1336	*/
1337	static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
1338	{
1339	DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
1340	BUG();
1341	return `0`;
1342	}
1343
1344	static uint64_t amdgpu_invalid_rreg64_ext(struct amdgpu_device *adev, uint64_t reg)
1345	{
1346	DRM_ERROR("Invalid callback to read register 0x%llX\n", reg);
1347	BUG();
1348	return `0`;
1349	}
1350
1351	/**
1352	* amdgpu_invalid_wreg64 - dummy reg write function
1353	*
1354	* @adev: amdgpu_device pointer
1355	* @reg: offset of register
1356	* @v: value to write to the register
1357	*
1358	* Dummy register read function. Used for register blocks
1359	* that certain asics don't have (all asics).
1360	*/
1361	static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint64_t v)
1362	{
1363	DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
1364	reg, v);
1365	BUG();
1366	}
1367
1368	static void amdgpu_invalid_wreg64_ext(struct amdgpu_device *adev, uint64_t reg, uint64_t v)
1369	{
1370	DRM_ERROR("Invalid callback to write 64 bit register 0x%llX with 0x%08llX\n",
1371	reg, v);
1372	BUG();
1373	}
1374
1375	/**
1376	* amdgpu_block_invalid_rreg - dummy reg read function
1377	*
1378	* @adev: amdgpu_device pointer
1379	* @block: offset of instance
1380	* @reg: offset of register
1381	*
1382	* Dummy register read function. Used for register blocks
1383	* that certain asics don't have (all asics).
1384	* Returns the value in the register.
1385	*/
1386	static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
1387	uint32_t block, uint32_t reg)
1388	{
1389	DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
1390	reg, block);
1391	BUG();
1392	return `0`;
1393	}
1394
1395	/**
1396	* amdgpu_block_invalid_wreg - dummy reg write function
1397	*
1398	* @adev: amdgpu_device pointer
1399	* @block: offset of instance
1400	* @reg: offset of register
1401	* @v: value to write to the register
1402	*
1403	* Dummy register read function. Used for register blocks
1404	* that certain asics don't have (all asics).
1405	*/
1406	static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
1407	uint32_t block,
1408	uint32_t reg, uint32_t v)
1409	{
1410	DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
1411	reg, block, v);
1412	BUG();
1413	}
1414
1415	static uint32_t amdgpu_device_get_vbios_flags(struct amdgpu_device *adev)
1416	{
1417	if (hweight32(adev->aid_mask) && (adev->flags & AMD_IS_APU))
1418	return AMDGPU_VBIOS_SKIP;
1419
1420	if (hweight32(adev->aid_mask) && amdgpu_passthrough(adev))
1421	return AMDGPU_VBIOS_OPTIONAL;
1422
1423	return `0`;
1424	}
1425
1426	/**
1427	* amdgpu_device_asic_init - Wrapper for atom asic_init
1428	*
1429	* @adev: amdgpu_device pointer
1430	*
1431	* Does any asic specific work and then calls atom asic init.
1432	*/
1433	static int amdgpu_device_asic_init(struct amdgpu_device *adev)
1434	{
1435	uint32_t flags;
1436	bool optional;
1437	int ret;
1438
1439	amdgpu_asic_pre_asic_init(adev);
1440	flags = amdgpu_device_get_vbios_flags(adev);
1441	optional = !!(flags & (AMDGPU_VBIOS_OPTIONAL \| AMDGPU_VBIOS_SKIP));
1442
1443	if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `3`) \|\|
1444	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `4`) \|\|
1445	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `5`, `0`) \|\|
1446	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) >= IP_VERSION(`11`, `0`, `0`)) {
1447	amdgpu_psp_wait_for_bootloader(adev);
1448	if (optional && !adev->bios)
1449	return `0`;
1450
1451	ret = amdgpu_atomfirmware_asic_init(adev, fb_reset: true);
1452	return ret;
1453	} else {
1454	if (optional && !adev->bios)
1455	return `0`;
1456
1457	return amdgpu_atom_asic_init(ctx: adev->mode_info.atom_context);
1458	}
1459
1460	return `0`;
1461	}
1462
1463	/**
1464	* amdgpu_device_mem_scratch_init - allocate the VRAM scratch page
1465	*
1466	* @adev: amdgpu_device pointer
1467	*
1468	* Allocates a scratch page of VRAM for use by various things in the
1469	* driver.
1470	*/
1471	static int amdgpu_device_mem_scratch_init(struct amdgpu_device *adev)
1472	{
1473	return amdgpu_bo_create_kernel(adev, AMDGPU_GPU_PAGE_SIZE, PAGE_SIZE,
1474	AMDGPU_GEM_DOMAIN_VRAM \|
1475	AMDGPU_GEM_DOMAIN_GTT,
1476	bo_ptr: &adev->mem_scratch.robj,
1477	gpu_addr: &adev->mem_scratch.gpu_addr,
1478	cpu_addr: (void **)&adev->mem_scratch.ptr);
1479	}
1480
1481	/**
1482	* amdgpu_device_mem_scratch_fini - Free the VRAM scratch page
1483	*
1484	* @adev: amdgpu_device pointer
1485	*
1486	* Frees the VRAM scratch page.
1487	*/
1488	static void amdgpu_device_mem_scratch_fini(struct amdgpu_device *adev)
1489	{
1490	amdgpu_bo_free_kernel(bo: &adev->mem_scratch.robj, NULL, NULL);
1491	}
1492
1493	/**
1494	* amdgpu_device_program_register_sequence - program an array of registers.
1495	*
1496	* @adev: amdgpu_device pointer
1497	* @registers: pointer to the register array
1498	* @array_size: size of the register array
1499	*
1500	* Programs an array or registers with and or masks.
1501	* This is a helper for setting golden registers.
1502	*/
1503	void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
1504	const u32 *registers,
1505	const u32 array_size)
1506	{
1507	u32 tmp, reg, and_mask, or_mask;
1508	int i;
1509
1510	if (array_size % `3`)
1511	return;
1512
1513	for (i = `0`; i < array_size; i += `3`) {
1514	reg = registers[i + `0`];
1515	and_mask = registers[i + `1`];
1516	or_mask = registers[i + `2`];
1517
1518	if (and_mask == `0xffffffff`) {
1519	tmp = or_mask;
1520	} else {
1521	tmp = RREG32(reg);
1522	tmp &= ~and_mask;
1523	if (adev->family >= AMDGPU_FAMILY_AI)
1524	tmp \|= (or_mask & and_mask);
1525	else
1526	tmp \|= or_mask;
1527	}
1528	WREG32(reg, tmp);
1529	}
1530	}
1531
1532	/**
1533	* amdgpu_device_pci_config_reset - reset the GPU
1534	*
1535	* @adev: amdgpu_device pointer
1536	*
1537	* Resets the GPU using the pci config reset sequence.
1538	* Only applicable to asics prior to vega10.
1539	*/
1540	void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
1541	{
1542	pci_write_config_dword(dev: adev->pdev, where: `0x7c`, AMDGPU_ASIC_RESET_DATA);
1543	}
1544
1545	/**
1546	* amdgpu_device_pci_reset - reset the GPU using generic PCI means
1547	*
1548	* @adev: amdgpu_device pointer
1549	*
1550	* Resets the GPU using generic pci reset interfaces (FLR, SBR, etc.).
1551	*/
1552	int amdgpu_device_pci_reset(struct amdgpu_device *adev)
1553	{
1554	return pci_reset_function(dev: adev->pdev);
1555	}
1556
1557	/*
1558	* amdgpu_device_wb_*()
1559	* Writeback is the method by which the GPU updates special pages in memory
1560	* with the status of certain GPU events (fences, ring pointers,etc.).
1561	*/
1562
1563	/**
1564	* amdgpu_device_wb_fini - Disable Writeback and free memory
1565	*
1566	* @adev: amdgpu_device pointer
1567	*
1568	* Disables Writeback and frees the Writeback memory (all asics).
1569	* Used at driver shutdown.
1570	*/
1571	static void amdgpu_device_wb_fini(struct amdgpu_device *adev)
1572	{
1573	if (adev->wb.wb_obj) {
1574	amdgpu_bo_free_kernel(bo: &adev->wb.wb_obj,
1575	gpu_addr: &adev->wb.gpu_addr,
1576	cpu_addr: (void **)&adev->wb.wb);
1577	adev->wb.wb_obj = NULL;
1578	}
1579	}
1580
1581	/**
1582	* amdgpu_device_wb_init - Init Writeback driver info and allocate memory
1583	*
1584	* @adev: amdgpu_device pointer
1585	*
1586	* Initializes writeback and allocates writeback memory (all asics).
1587	* Used at driver startup.
1588	* Returns 0 on success or an -error on failure.
1589	*/
1590	static int amdgpu_device_wb_init(struct amdgpu_device *adev)
1591	{
1592	int r;
1593
1594	if (adev->wb.wb_obj == NULL) {
1595	/ AMDGPU_MAX_WB * sizeof(uint32_t) * 8 = AMDGPU_MAX_WB 256bit slots /
1596	r = amdgpu_bo_create_kernel(adev, AMDGPU_MAX_WB * sizeof(uint32_t) * `8`,
1597	PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT,
1598	bo_ptr: &adev->wb.wb_obj, gpu_addr: &adev->wb.gpu_addr,
1599	cpu_addr: (void **)&adev->wb.wb);
1600	if (r) {
1601	dev_warn(adev->dev, "(%d) create WB bo failed\n", r);
1602	return r;
1603	}
1604
1605	adev->wb.num_wb = AMDGPU_MAX_WB;
1606	memset(&adev->wb.used, `0`, sizeof(adev->wb.used));
1607
1608	/ clear wb memory /
1609	memset((char )adev->wb.wb, `0`, AMDGPU_MAX_WB sizeof(uint32_t) * `8`);
1610	}
1611
1612	return `0`;
1613	}
1614
1615	/**
1616	* amdgpu_device_wb_get - Allocate a wb entry
1617	*
1618	* @adev: amdgpu_device pointer
1619	* @wb: wb index
1620	*
1621	* Allocate a wb slot for use by the driver (all asics).
1622	* Returns 0 on success or -EINVAL on failure.
1623	*/
1624	int amdgpu_device_wb_get(struct amdgpu_device adev, u32 wb)
1625	{
1626	unsigned long flags, offset;
1627
1628	spin_lock_irqsave(&adev->wb.lock, flags);
1629	offset = find_first_zero_bit(addr: adev->wb.used, size: adev->wb.num_wb);
1630	if (offset < adev->wb.num_wb) {
1631	__set_bit(offset, adev->wb.used);
1632	spin_unlock_irqrestore(lock: &adev->wb.lock, flags);
1633	wb = offset << `3`; /* convert to dw offset /
1634	return `0`;
1635	} else {
1636	spin_unlock_irqrestore(lock: &adev->wb.lock, flags);
1637	return -EINVAL;
1638	}
1639	}
1640
1641	/**
1642	* amdgpu_device_wb_free - Free a wb entry
1643	*
1644	* @adev: amdgpu_device pointer
1645	* @wb: wb index
1646	*
1647	* Free a wb slot allocated for use by the driver (all asics)
1648	*/
1649	void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb)
1650	{
1651	unsigned long flags;
1652
1653	wb >>= `3`;
1654	spin_lock_irqsave(&adev->wb.lock, flags);
1655	if (wb < adev->wb.num_wb)
1656	__clear_bit(wb, adev->wb.used);
1657	spin_unlock_irqrestore(lock: &adev->wb.lock, flags);
1658	}
1659
1660	/**
1661	* amdgpu_device_resize_fb_bar - try to resize FB BAR
1662	*
1663	* @adev: amdgpu_device pointer
1664	*
1665	* Try to resize FB BAR to make all VRAM CPU accessible. We try very hard not
1666	* to fail, but if any of the BARs is not accessible after the size we abort
1667	* driver loading by returning -ENODEV.
1668	*/
1669	int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev)
1670	{
1671	int rbar_size = pci_rebar_bytes_to_size(bytes: adev->gmc.real_vram_size);
1672	struct pci_bus *root;
1673	struct resource *res;
1674	unsigned int i;
1675	u16 cmd;
1676	int r;
1677
1678	if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
1679	return `0`;
1680
1681	/ Bypass for VF /
1682	if (amdgpu_sriov_vf(adev))
1683	return `0`;
1684
1685	if (!amdgpu_rebar)
1686	return `0`;
1687
1688	/ resizing on Dell G5 SE platforms causes problems with runtime pm /
1689	if ((amdgpu_runtime_pm != `0`) &&
1690	adev->pdev->vendor == PCI_VENDOR_ID_ATI &&
1691	adev->pdev->device == `0x731f` &&
1692	adev->pdev->subsystem_vendor == PCI_VENDOR_ID_DELL)
1693	return `0`;
1694
1695	/ PCI_EXT_CAP_ID_VNDR extended capability is located at 0x100 /
1696	if (!pci_find_ext_capability(dev: adev->pdev, PCI_EXT_CAP_ID_VNDR))
1697	DRM_WARN("System can't access extended configuration space, please check!!\n");
1698
1699	/ skip if the bios has already enabled large BAR /
1700	if (adev->gmc.real_vram_size &&
1701	(pci_resource_len(adev->pdev, `0`) >= adev->gmc.real_vram_size))
1702	return `0`;
1703
1704	/ Check if the root BUS has 64bit memory resources /
1705	root = adev->pdev->bus;
1706	while (root->parent)
1707	root = root->parent;
1708
1709	pci_bus_for_each_resource(root, res, i) {
1710	if (res && res->flags & (IORESOURCE_MEM \| IORESOURCE_MEM_64) &&
1711	res->start > `0x100000000ull`)
1712	break;
1713	}
1714
1715	/ Trying to resize is pointless without a root hub window above 4GB /
1716	if (!res)
1717	return `0`;
1718
1719	/ Limit the BAR size to what is available /
1720	rbar_size = min(fls(pci_rebar_get_possible_sizes(adev->pdev, `0`)) - `1`,
1721	rbar_size);
1722
1723	/ Disable memory decoding while we change the BAR addresses and size /
1724	pci_read_config_word(dev: adev->pdev, PCI_COMMAND, val: &cmd);
1725	pci_write_config_word(dev: adev->pdev, PCI_COMMAND,
1726	val: cmd & ~PCI_COMMAND_MEMORY);
1727
1728	/ Free the VRAM and doorbell BAR, we most likely need to move both. /
1729	amdgpu_doorbell_fini(adev);
1730	if (adev->asic_type >= CHIP_BONAIRE)
1731	pci_release_resource(dev: adev->pdev, resno: `2`);
1732
1733	pci_release_resource(dev: adev->pdev, resno: `0`);
1734
1735	r = pci_resize_resource(dev: adev->pdev, i: `0`, size: rbar_size);
1736	if (r == -ENOSPC)
1737	DRM_INFO("Not enough PCI address space for a large BAR.");
1738	else if (r && r != -ENOTSUPP)
1739	DRM_ERROR("Problem resizing BAR0 (%d).", r);
1740
1741	pci_assign_unassigned_bus_resources(bus: adev->pdev->bus);
1742
1743	/ When the doorbell or fb BAR isn't available we have no chance of*
1744	* using the device.
1745	*/
1746	r = amdgpu_doorbell_init(adev);
1747	if (r \|\| (pci_resource_flags(adev->pdev, `0`) & IORESOURCE_UNSET))
1748	return -ENODEV;
1749
1750	pci_write_config_word(dev: adev->pdev, PCI_COMMAND, val: cmd);
1751
1752	return `0`;
1753	}
1754
1755	/*
1756	* GPU helpers function.
1757	*/
1758	/**
1759	* amdgpu_device_need_post - check if the hw need post or not
1760	*
1761	* @adev: amdgpu_device pointer
1762	*
1763	* Check if the asic has been initialized (all asics) at driver startup
1764	* or post is needed if hw reset is performed.
1765	* Returns true if need or false if not.
1766	*/
1767	bool amdgpu_device_need_post(struct amdgpu_device *adev)
1768	{
1769	uint32_t reg, flags;
1770
1771	if (amdgpu_sriov_vf(adev))
1772	return false;
1773
1774	flags = amdgpu_device_get_vbios_flags(adev);
1775	if (flags & AMDGPU_VBIOS_SKIP)
1776	return false;
1777	if ((flags & AMDGPU_VBIOS_OPTIONAL) && !adev->bios)
1778	return false;
1779
1780	if (amdgpu_passthrough(adev)) {
1781	/ for FIJI: In whole GPU pass-through virtualization case, after VM reboot*
1782	* some old smc fw still need driver do vPost otherwise gpu hang, while
1783	* those smc fw version above 22.15 doesn't have this flaw, so we force
1784	* vpost executed for smc version below 22.15
1785	*/
1786	if (adev->asic_type == CHIP_FIJI) {
1787	int err;
1788	uint32_t fw_ver;
1789
1790	err = request_firmware(fw: &adev->pm.fw, name: "amdgpu/fiji_smc.bin", device: adev->dev);
1791	/ force vPost if error occurred /
1792	if (err)
1793	return true;
1794
1795	fw_ver = ((uint32_t )adev->pm.fw->data + `69`);
1796	release_firmware(fw: adev->pm.fw);
1797	if (fw_ver < `0x00160e00`)
1798	return true;
1799	}
1800	}
1801
1802	/ Don't post if we need to reset whole hive on init /
1803	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
1804	return false;
1805
1806	if (adev->has_hw_reset) {
1807	adev->has_hw_reset = false;
1808	return true;
1809	}
1810
1811	/ bios scratch used on CIK+ /
1812	if (adev->asic_type >= CHIP_BONAIRE)
1813	return amdgpu_atombios_scratch_need_asic_init(adev);
1814
1815	/ check MEM_SIZE for older asics /
1816	reg = amdgpu_asic_get_config_memsize(adev);
1817
1818	if ((reg != `0`) && (reg != `0xffffffff`))
1819	return false;
1820
1821	return true;
1822	}
1823
1824	/*
1825	* Check whether seamless boot is supported.
1826	*
1827	* So far we only support seamless boot on DCE 3.0 or later.
1828	* If users report that it works on older ASICS as well, we may
1829	* loosen this.
1830	*/
1831	bool amdgpu_device_seamless_boot_supported(struct amdgpu_device *adev)
1832	{
1833	switch (amdgpu_seamless) {
1834	case -`1`:
1835	break;
1836	case `1`:
1837	return true;
1838	case `0`:
1839	return false;
1840	default:
1841	DRM_ERROR("Invalid value for amdgpu.seamless: %d\n",
1842	amdgpu_seamless);
1843	return false;
1844	}
1845
1846	if (!(adev->flags & AMD_IS_APU))
1847	return false;
1848
1849	if (adev->mman.keep_stolen_vga_memory)
1850	return false;
1851
1852	return amdgpu_ip_version(adev, ip: DCE_HWIP, inst: `0`) >= IP_VERSION(`3`, `0`, `0`);
1853	}
1854
1855	/*
1856	* Intel hosts such as Rocket Lake, Alder Lake, Raptor Lake and Sapphire Rapids
1857	* don't support dynamic speed switching. Until we have confirmation from Intel
1858	* that a specific host supports it, it's safer that we keep it disabled for all.
1859	*
1860	* https://edc.intel.com/content/www/us/en/design/products/platforms/details/raptor-lake-s/13th-generation-core-processors-datasheet-volume-1-of-2/005/pci-express-support/
1861	* https://gitlab.freedesktop.org/drm/amd/-/issues/2663
1862	*/
1863	static bool amdgpu_device_pcie_dynamic_switching_supported(struct amdgpu_device *adev)
1864	{
1865	#if IS_ENABLED(CONFIG_X86)
1866	struct cpuinfo_x86 *c = &cpu_data(`0`);
1867
1868	/ eGPU change speeds based on USB4 fabric conditions /
1869	if (dev_is_removable(dev: adev->dev))
1870	return true;
1871
1872	if (c->x86_vendor == X86_VENDOR_INTEL)
1873	return false;
1874	#endif
1875	return true;
1876	}
1877
1878	static bool amdgpu_device_aspm_support_quirk(struct amdgpu_device *adev)
1879	{
1880	#if IS_ENABLED(CONFIG_X86)
1881	struct cpuinfo_x86 *c = &cpu_data(`0`);
1882
1883	if (!(amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`12`, `0`, `0`) \|\|
1884	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`12`, `0`, `1`)))
1885	return false;
1886
1887	if (c->x86 == `6` &&
1888	adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5) {
1889	switch (c->x86_model) {
1890	case VFM_MODEL(INTEL_ALDERLAKE):
1891	case VFM_MODEL(INTEL_ALDERLAKE_L):
1892	case VFM_MODEL(INTEL_RAPTORLAKE):
1893	case VFM_MODEL(INTEL_RAPTORLAKE_P):
1894	case VFM_MODEL(INTEL_RAPTORLAKE_S):
1895	return true;
1896	default:
1897	return false;
1898	}
1899	} else {
1900	return false;
1901	}
1902	#else
1903	return false;
1904	#endif
1905	}
1906
1907	/**
1908	* amdgpu_device_should_use_aspm - check if the device should program ASPM
1909	*
1910	* @adev: amdgpu_device pointer
1911	*
1912	* Confirm whether the module parameter and pcie bridge agree that ASPM should
1913	* be set for this device.
1914	*
1915	* Returns true if it should be used or false if not.
1916	*/
1917	bool amdgpu_device_should_use_aspm(struct amdgpu_device *adev)
1918	{
1919	switch (amdgpu_aspm) {
1920	case -`1`:
1921	break;
1922	case `0`:
1923	return false;
1924	case `1`:
1925	return true;
1926	default:
1927	return false;
1928	}
1929	if (adev->flags & AMD_IS_APU)
1930	return false;
1931	if (amdgpu_device_aspm_support_quirk(adev))
1932	return false;
1933	return pcie_aspm_enabled(pdev: adev->pdev);
1934	}
1935
1936	/ if we get transitioned to only one device, take VGA back /
1937	/**
1938	* amdgpu_device_vga_set_decode - enable/disable vga decode
1939	*
1940	* @pdev: PCI device pointer
1941	* @state: enable/disable vga decode
1942	*
1943	* Enable/disable vga decode (all asics).
1944	* Returns VGA resource flags.
1945	*/
1946	static unsigned int amdgpu_device_vga_set_decode(struct pci_dev *pdev,
1947	bool state)
1948	{
1949	struct amdgpu_device *adev = drm_to_adev(ddev: pci_get_drvdata(pdev));
1950
1951	amdgpu_asic_set_vga_state(adev, state);
1952	if (state)
1953	return VGA_RSRC_LEGACY_IO \| VGA_RSRC_LEGACY_MEM \|
1954	VGA_RSRC_NORMAL_IO \| VGA_RSRC_NORMAL_MEM;
1955	else
1956	return VGA_RSRC_NORMAL_IO \| VGA_RSRC_NORMAL_MEM;
1957	}
1958
1959	/**
1960	* amdgpu_device_check_block_size - validate the vm block size
1961	*
1962	* @adev: amdgpu_device pointer
1963	*
1964	* Validates the vm block size specified via module parameter.
1965	* The vm block size defines number of bits in page table versus page directory,
1966	* a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1967	* page table and the remaining bits are in the page directory.
1968	*/
1969	static void amdgpu_device_check_block_size(struct amdgpu_device *adev)
1970	{
1971	/ defines number of bits in page table versus page directory,*
1972	* a page is 4KB so we have 12 bits offset, minimum 9 bits in the
1973	* page table and the remaining bits are in the page directory
1974	*/
1975	if (amdgpu_vm_block_size == -`1`)
1976	return;
1977
1978	if (amdgpu_vm_block_size < `9`) {
1979	dev_warn(adev->dev, "VM page table size (%d) too small\n",
1980	amdgpu_vm_block_size);
1981	amdgpu_vm_block_size = -`1`;
1982	}
1983	}
1984
1985	/**
1986	* amdgpu_device_check_vm_size - validate the vm size
1987	*
1988	* @adev: amdgpu_device pointer
1989	*
1990	* Validates the vm size in GB specified via module parameter.
1991	* The VM size is the size of the GPU virtual memory space in GB.
1992	*/
1993	static void amdgpu_device_check_vm_size(struct amdgpu_device *adev)
1994	{
1995	/ no need to check the default value /
1996	if (amdgpu_vm_size == -`1`)
1997	return;
1998
1999	if (amdgpu_vm_size < `1`) {
2000	dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
2001	amdgpu_vm_size);
2002	amdgpu_vm_size = -`1`;
2003	}
2004	}
2005
2006	static void amdgpu_device_check_smu_prv_buffer_size(struct amdgpu_device *adev)
2007	{
2008	struct sysinfo si;
2009	bool is_os_64 = (sizeof(void *) == `8`);
2010	uint64_t total_memory;
2011	uint64_t dram_size_seven_GB = `0x1B8000000`;
2012	uint64_t dram_size_three_GB = `0xB8000000`;
2013
2014	if (amdgpu_smu_memory_pool_size == `0`)
2015	return;
2016
2017	if (!is_os_64) {
2018	DRM_WARN("Not 64-bit OS, feature not supported\n");
2019	goto def_value;
2020	}
2021	si_meminfo(val: &si);
2022	total_memory = (uint64_t)si.totalram * si.mem_unit;
2023
2024	if ((amdgpu_smu_memory_pool_size == `1`) \|\|
2025	(amdgpu_smu_memory_pool_size == `2`)) {
2026	if (total_memory < dram_size_three_GB)
2027	goto def_value1;
2028	} else if ((amdgpu_smu_memory_pool_size == `4`) \|\|
2029	(amdgpu_smu_memory_pool_size == `8`)) {
2030	if (total_memory < dram_size_seven_GB)
2031	goto def_value1;
2032	} else {
2033	DRM_WARN("Smu memory pool size not supported\n");
2034	goto def_value;
2035	}
2036	adev->pm.smu_prv_buffer_size = amdgpu_smu_memory_pool_size << `28`;
2037
2038	return;
2039
2040	def_value1:
2041	DRM_WARN("No enough system memory\n");
2042	def_value:
2043	adev->pm.smu_prv_buffer_size = `0`;
2044	}
2045
2046	static int amdgpu_device_init_apu_flags(struct amdgpu_device *adev)
2047	{
2048	if (!(adev->flags & AMD_IS_APU) \|\|
2049	adev->asic_type < CHIP_RAVEN)
2050	return `0`;
2051
2052	switch (adev->asic_type) {
2053	case CHIP_RAVEN:
2054	if (adev->pdev->device == `0x15dd`)
2055	adev->apu_flags \|= AMD_APU_IS_RAVEN;
2056	if (adev->pdev->device == `0x15d8`)
2057	adev->apu_flags \|= AMD_APU_IS_PICASSO;
2058	break;
2059	case CHIP_RENOIR:
2060	if ((adev->pdev->device == `0x1636`) \|\|
2061	(adev->pdev->device == `0x164c`))
2062	adev->apu_flags \|= AMD_APU_IS_RENOIR;
2063	else
2064	adev->apu_flags \|= AMD_APU_IS_GREEN_SARDINE;
2065	break;
2066	case CHIP_VANGOGH:
2067	adev->apu_flags \|= AMD_APU_IS_VANGOGH;
2068	break;
2069	case CHIP_YELLOW_CARP:
2070	break;
2071	case CHIP_CYAN_SKILLFISH:
2072	if ((adev->pdev->device == `0x13FE`) \|\|
2073	(adev->pdev->device == `0x143F`))
2074	adev->apu_flags \|= AMD_APU_IS_CYAN_SKILLFISH2;
2075	break;
2076	default:
2077	break;
2078	}
2079
2080	return `0`;
2081	}
2082
2083	/**
2084	* amdgpu_device_check_arguments - validate module params
2085	*
2086	* @adev: amdgpu_device pointer
2087	*
2088	* Validates certain module parameters and updates
2089	* the associated values used by the driver (all asics).
2090	*/
2091	static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
2092	{
2093	int i;
2094
2095	if (amdgpu_sched_jobs < `4`) {
2096	dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
2097	amdgpu_sched_jobs);
2098	amdgpu_sched_jobs = `4`;
2099	} else if (!is_power_of_2(n: amdgpu_sched_jobs)) {
2100	dev_warn(adev->dev, "sched jobs (%d) must be a power of 2\n",
2101	amdgpu_sched_jobs);
2102	amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
2103	}
2104
2105	if (amdgpu_gart_size != -`1` && amdgpu_gart_size < `32`) {
2106	/ gart size must be greater or equal to 32M /
2107	dev_warn(adev->dev, "gart size (%d) too small\n",
2108	amdgpu_gart_size);
2109	amdgpu_gart_size = -`1`;
2110	}
2111
2112	if (amdgpu_gtt_size != -`1` && amdgpu_gtt_size < `32`) {
2113	/ gtt size must be greater or equal to 32M /
2114	dev_warn(adev->dev, "gtt size (%d) too small\n",
2115	amdgpu_gtt_size);
2116	amdgpu_gtt_size = -`1`;
2117	}
2118
2119	/ valid range is between 4 and 9 inclusive /
2120	if (amdgpu_vm_fragment_size != -`1` &&
2121	(amdgpu_vm_fragment_size > `9` \|\| amdgpu_vm_fragment_size < `4`)) {
2122	dev_warn(adev->dev, "valid range is between 4 and 9\n");
2123	amdgpu_vm_fragment_size = -`1`;
2124	}
2125
2126	if (amdgpu_sched_hw_submission < `2`) {
2127	dev_warn(adev->dev, "sched hw submission jobs (%d) must be at least 2\n",
2128	amdgpu_sched_hw_submission);
2129	amdgpu_sched_hw_submission = `2`;
2130	} else if (!is_power_of_2(n: amdgpu_sched_hw_submission)) {
2131	dev_warn(adev->dev, "sched hw submission jobs (%d) must be a power of 2\n",
2132	amdgpu_sched_hw_submission);
2133	amdgpu_sched_hw_submission = roundup_pow_of_two(amdgpu_sched_hw_submission);
2134	}
2135
2136	if (amdgpu_reset_method < -`1` \|\| amdgpu_reset_method > `4`) {
2137	dev_warn(adev->dev, "invalid option for reset method, reverting to default\n");
2138	amdgpu_reset_method = -`1`;
2139	}
2140
2141	amdgpu_device_check_smu_prv_buffer_size(adev);
2142
2143	amdgpu_device_check_vm_size(adev);
2144
2145	amdgpu_device_check_block_size(adev);
2146
2147	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, load_type: amdgpu_fw_load_type);
2148
2149	for (i = `0`; i < MAX_XCP; i++) {
2150	switch (amdgpu_enforce_isolation) {
2151	case -`1`:
2152	case `0`:
2153	default:
2154	/ disable /
2155	adev->enforce_isolation[i] = AMDGPU_ENFORCE_ISOLATION_DISABLE;
2156	break;
2157	case `1`:
2158	/ enable /
2159	adev->enforce_isolation[i] =
2160	AMDGPU_ENFORCE_ISOLATION_ENABLE;
2161	break;
2162	case `2`:
2163	/ enable legacy mode /
2164	adev->enforce_isolation[i] =
2165	AMDGPU_ENFORCE_ISOLATION_ENABLE_LEGACY;
2166	break;
2167	case `3`:
2168	/ enable only process isolation without submitting cleaner shader /
2169	adev->enforce_isolation[i] =
2170	AMDGPU_ENFORCE_ISOLATION_NO_CLEANER_SHADER;
2171	break;
2172	}
2173	}
2174
2175	return `0`;
2176	}
2177
2178	/**
2179	* amdgpu_switcheroo_set_state - set switcheroo state
2180	*
2181	* @pdev: pci dev pointer
2182	* @state: vga_switcheroo state
2183	*
2184	* Callback for the switcheroo driver. Suspends or resumes
2185	* the asics before or after it is powered up using ACPI methods.
2186	*/
2187	static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
2188	enum vga_switcheroo_state state)
2189	{
2190	struct drm_device *dev = pci_get_drvdata(pdev);
2191	int r;
2192
2193	if (amdgpu_device_supports_px(dev) && state == VGA_SWITCHEROO_OFF)
2194	return;
2195
2196	if (state == VGA_SWITCHEROO_ON) {
2197	pr_info("switched on\n");
2198	/ don't suspend or resume card normally /
2199	dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2200
2201	pci_set_power_state(dev: pdev, PCI_D0);
2202	amdgpu_device_load_pci_state(pdev);
2203	r = pci_enable_device(dev: pdev);
2204	if (r)
2205	DRM_WARN("pci_enable_device failed (%d)\n", r);
2206	amdgpu_device_resume(dev, fbcon: true);
2207
2208	dev->switch_power_state = DRM_SWITCH_POWER_ON;
2209	} else {
2210	pr_info("switched off\n");
2211	dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;
2212	amdgpu_device_prepare(dev);
2213	amdgpu_device_suspend(dev, fbcon: true);
2214	amdgpu_device_cache_pci_state(pdev);
2215	/ Shut down the device /
2216	pci_disable_device(dev: pdev);
2217	pci_set_power_state(dev: pdev, PCI_D3cold);
2218	dev->switch_power_state = DRM_SWITCH_POWER_OFF;
2219	}
2220	}
2221
2222	/**
2223	* amdgpu_switcheroo_can_switch - see if switcheroo state can change
2224	*
2225	* @pdev: pci dev pointer
2226	*
2227	* Callback for the switcheroo driver. Check of the switcheroo
2228	* state can be changed.
2229	* Returns true if the state can be changed, false if not.
2230	*/
2231	static bool amdgpu_switcheroo_can_switch(struct pci_dev *pdev)
2232	{
2233	struct drm_device *dev = pci_get_drvdata(pdev);
2234
2235	/*
2236	* FIXME: open_count is protected by drm_global_mutex but that would lead to
2237	* locking inversion with the driver load path. And the access here is
2238	* completely racy anyway. So don't bother with locking for now.
2239	*/
2240	return atomic_read(v: &dev->open_count) == `0`;
2241	}
2242
2243	static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
2244	.set_gpu_state = amdgpu_switcheroo_set_state,
2245	.reprobe = NULL,
2246	.can_switch = amdgpu_switcheroo_can_switch,
2247	};
2248
2249	/**
2250	* amdgpu_device_ip_set_clockgating_state - set the CG state
2251	*
2252	* @dev: amdgpu_device pointer
2253	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2254	* @state: clockgating state (gate or ungate)
2255	*
2256	* Sets the requested clockgating state for all instances of
2257	* the hardware IP specified.
2258	* Returns the error code from the last instance.
2259	*/
2260	int amdgpu_device_ip_set_clockgating_state(void *dev,
2261	enum amd_ip_block_type block_type,
2262	enum amd_clockgating_state state)
2263	{
2264	struct amdgpu_device *adev = dev;
2265	int i, r = `0`;
2266
2267	for (i = `0`; i < adev->num_ip_blocks; i++) {
2268	if (!adev->ip_blocks[i].status.valid)
2269	continue;
2270	if (adev->ip_blocks[i].version->type != block_type)
2271	continue;
2272	if (!adev->ip_blocks[i].version->funcs->set_clockgating_state)
2273	continue;
2274	r = adev->ip_blocks[i].version->funcs->set_clockgating_state(
2275	&adev->ip_blocks[i], state);
2276	if (r)
2277	DRM_ERROR("set_clockgating_state of IP block <%s> failed %d\n",
2278	adev->ip_blocks[i].version->funcs->name, r);
2279	}
2280	return r;
2281	}
2282
2283	/**
2284	* amdgpu_device_ip_set_powergating_state - set the PG state
2285	*
2286	* @dev: amdgpu_device pointer
2287	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2288	* @state: powergating state (gate or ungate)
2289	*
2290	* Sets the requested powergating state for all instances of
2291	* the hardware IP specified.
2292	* Returns the error code from the last instance.
2293	*/
2294	int amdgpu_device_ip_set_powergating_state(void *dev,
2295	enum amd_ip_block_type block_type,
2296	enum amd_powergating_state state)
2297	{
2298	struct amdgpu_device *adev = dev;
2299	int i, r = `0`;
2300
2301	for (i = `0`; i < adev->num_ip_blocks; i++) {
2302	if (!adev->ip_blocks[i].status.valid)
2303	continue;
2304	if (adev->ip_blocks[i].version->type != block_type)
2305	continue;
2306	if (!adev->ip_blocks[i].version->funcs->set_powergating_state)
2307	continue;
2308	r = adev->ip_blocks[i].version->funcs->set_powergating_state(
2309	&adev->ip_blocks[i], state);
2310	if (r)
2311	DRM_ERROR("set_powergating_state of IP block <%s> failed %d\n",
2312	adev->ip_blocks[i].version->funcs->name, r);
2313	}
2314	return r;
2315	}
2316
2317	/**
2318	* amdgpu_device_ip_get_clockgating_state - get the CG state
2319	*
2320	* @adev: amdgpu_device pointer
2321	* @flags: clockgating feature flags
2322	*
2323	* Walks the list of IPs on the device and updates the clockgating
2324	* flags for each IP.
2325	* Updates @flags with the feature flags for each hardware IP where
2326	* clockgating is enabled.
2327	*/
2328	void amdgpu_device_ip_get_clockgating_state(struct amdgpu_device *adev,
2329	u64 *flags)
2330	{
2331	int i;
2332
2333	for (i = `0`; i < adev->num_ip_blocks; i++) {
2334	if (!adev->ip_blocks[i].status.valid)
2335	continue;
2336	if (adev->ip_blocks[i].version->funcs->get_clockgating_state)
2337	adev->ip_blocks[i].version->funcs->get_clockgating_state(
2338	&adev->ip_blocks[i], flags);
2339	}
2340	}
2341
2342	/**
2343	* amdgpu_device_ip_wait_for_idle - wait for idle
2344	*
2345	* @adev: amdgpu_device pointer
2346	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2347	*
2348	* Waits for the request hardware IP to be idle.
2349	* Returns 0 for success or a negative error code on failure.
2350	*/
2351	int amdgpu_device_ip_wait_for_idle(struct amdgpu_device *adev,
2352	enum amd_ip_block_type block_type)
2353	{
2354	int i, r;
2355
2356	for (i = `0`; i < adev->num_ip_blocks; i++) {
2357	if (!adev->ip_blocks[i].status.valid)
2358	continue;
2359	if (adev->ip_blocks[i].version->type == block_type) {
2360	if (adev->ip_blocks[i].version->funcs->wait_for_idle) {
2361	r = adev->ip_blocks[i].version->funcs->wait_for_idle(
2362	&adev->ip_blocks[i]);
2363	if (r)
2364	return r;
2365	}
2366	break;
2367	}
2368	}
2369	return `0`;
2370
2371	}
2372
2373	/**
2374	* amdgpu_device_ip_is_valid - is the hardware IP enabled
2375	*
2376	* @adev: amdgpu_device pointer
2377	* @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
2378	*
2379	* Check if the hardware IP is enable or not.
2380	* Returns true if it the IP is enable, false if not.
2381	*/
2382	bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
2383	enum amd_ip_block_type block_type)
2384	{
2385	int i;
2386
2387	for (i = `0`; i < adev->num_ip_blocks; i++) {
2388	if (adev->ip_blocks[i].version->type == block_type)
2389	return adev->ip_blocks[i].status.valid;
2390	}
2391	return false;
2392
2393	}
2394
2395	/**
2396	* amdgpu_device_ip_get_ip_block - get a hw IP pointer
2397	*
2398	* @adev: amdgpu_device pointer
2399	* @type: Type of hardware IP (SMU, GFX, UVD, etc.)
2400	*
2401	* Returns a pointer to the hardware IP block structure
2402	* if it exists for the asic, otherwise NULL.
2403	*/
2404	struct amdgpu_ip_block *
2405	amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
2406	enum amd_ip_block_type type)
2407	{
2408	int i;
2409
2410	for (i = `0`; i < adev->num_ip_blocks; i++)
2411	if (adev->ip_blocks[i].version->type == type)
2412	return &adev->ip_blocks[i];
2413
2414	return NULL;
2415	}
2416
2417	/**
2418	* amdgpu_device_ip_block_version_cmp
2419	*
2420	* @adev: amdgpu_device pointer
2421	* @type: enum amd_ip_block_type
2422	* @major: major version
2423	* @minor: minor version
2424	*
2425	* return 0 if equal or greater
2426	* return 1 if smaller or the ip_block doesn't exist
2427	*/
2428	int amdgpu_device_ip_block_version_cmp(struct amdgpu_device *adev,
2429	enum amd_ip_block_type type,
2430	u32 major, u32 minor)
2431	{
2432	struct amdgpu_ip_block *ip_block = amdgpu_device_ip_get_ip_block(adev, type);
2433
2434	if (ip_block && ((ip_block->version->major > major) \|\|
2435	((ip_block->version->major == major) &&
2436	(ip_block->version->minor >= minor))))
2437	return `0`;
2438
2439	return `1`;
2440	}
2441
2442	/**
2443	* amdgpu_device_ip_block_add
2444	*
2445	* @adev: amdgpu_device pointer
2446	* @ip_block_version: pointer to the IP to add
2447	*
2448	* Adds the IP block driver information to the collection of IPs
2449	* on the asic.
2450	*/
2451	int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
2452	const struct amdgpu_ip_block_version *ip_block_version)
2453	{
2454	if (!ip_block_version)
2455	return -EINVAL;
2456
2457	switch (ip_block_version->type) {
2458	case AMD_IP_BLOCK_TYPE_VCN:
2459	if (adev->harvest_ip_mask & AMD_HARVEST_IP_VCN_MASK)
2460	return `0`;
2461	break;
2462	case AMD_IP_BLOCK_TYPE_JPEG:
2463	if (adev->harvest_ip_mask & AMD_HARVEST_IP_JPEG_MASK)
2464	return `0`;
2465	break;
2466	default:
2467	break;
2468	}
2469
2470	dev_info(adev->dev, "detected ip block number %d <%s>\n",
2471	adev->num_ip_blocks, ip_block_version->funcs->name);
2472
2473	adev->ip_blocks[adev->num_ip_blocks].adev = adev;
2474
2475	adev->ip_blocks[adev->num_ip_blocks++].version = ip_block_version;
2476
2477	return `0`;
2478	}
2479
2480	/**
2481	* amdgpu_device_enable_virtual_display - enable virtual display feature
2482	*
2483	* @adev: amdgpu_device pointer
2484	*
2485	* Enabled the virtual display feature if the user has enabled it via
2486	* the module parameter virtual_display. This feature provides a virtual
2487	* display hardware on headless boards or in virtualized environments.
2488	* This function parses and validates the configuration string specified by
2489	* the user and configures the virtual display configuration (number of
2490	* virtual connectors, crtcs, etc.) specified.
2491	*/
2492	static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
2493	{
2494	adev->enable_virtual_display = false;
2495
2496	if (amdgpu_virtual_display) {
2497	const char *pci_address_name = pci_name(pdev: adev->pdev);
2498	char pciaddstr, pciaddstr_tmp, pciaddname_tmp, pciaddname;
2499
2500	pciaddstr = kstrdup(s: amdgpu_virtual_display, GFP_KERNEL);
2501	pciaddstr_tmp = pciaddstr;
2502	while ((pciaddname_tmp = strsep(&pciaddstr_tmp, ";"))) {
2503	pciaddname = strsep(&pciaddname_tmp, ",");
2504	if (!strcmp("all", pciaddname)
2505	\|\| !strcmp(pci_address_name, pciaddname)) {
2506	long num_crtc;
2507	int res = -`1`;
2508
2509	adev->enable_virtual_display = true;
2510
2511	if (pciaddname_tmp)
2512	res = kstrtol(s: pciaddname_tmp, base: `10`,
2513	res: &num_crtc);
2514
2515	if (!res) {
2516	if (num_crtc < `1`)
2517	num_crtc = `1`;
2518	if (num_crtc > `6`)
2519	num_crtc = `6`;
2520	adev->mode_info.num_crtc = num_crtc;
2521	} else {
2522	adev->mode_info.num_crtc = `1`;
2523	}
2524	break;
2525	}
2526	}
2527
2528	DRM_INFO("virtual display string:%s, %s:virtual_display:%d, num_crtc:%d\n",
2529	amdgpu_virtual_display, pci_address_name,
2530	adev->enable_virtual_display, adev->mode_info.num_crtc);
2531
2532	kfree(objp: pciaddstr);
2533	}
2534	}
2535
2536	void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
2537	{
2538	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
2539	adev->mode_info.num_crtc = `1`;
2540	adev->enable_virtual_display = true;
2541	DRM_INFO("virtual_display:%d, num_crtc:%d\n",
2542	adev->enable_virtual_display, adev->mode_info.num_crtc);
2543	}
2544	}
2545
2546	/**
2547	* amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
2548	*
2549	* @adev: amdgpu_device pointer
2550	*
2551	* Parses the asic configuration parameters specified in the gpu info
2552	* firmware and makes them available to the driver for use in configuring
2553	* the asic.
2554	* Returns 0 on success, -EINVAL on failure.
2555	*/
2556	static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
2557	{
2558	const char *chip_name;
2559	int err;
2560	const struct gpu_info_firmware_header_v1_0 *hdr;
2561
2562	adev->firmware.gpu_info_fw = NULL;
2563
2564	if (adev->mman.discovery_bin)
2565	return `0`;
2566
2567	switch (adev->asic_type) {
2568	default:
2569	return `0`;
2570	case CHIP_VEGA10:
2571	chip_name = "vega10";
2572	break;
2573	case CHIP_VEGA12:
2574	chip_name = "vega12";
2575	break;
2576	case CHIP_RAVEN:
2577	if (adev->apu_flags & AMD_APU_IS_RAVEN2)
2578	chip_name = "raven2";
2579	else if (adev->apu_flags & AMD_APU_IS_PICASSO)
2580	chip_name = "picasso";
2581	else
2582	chip_name = "raven";
2583	break;
2584	case CHIP_ARCTURUS:
2585	chip_name = "arcturus";
2586	break;
2587	case CHIP_NAVI12:
2588	chip_name = "navi12";
2589	break;
2590	}
2591
2592	err = amdgpu_ucode_request(adev, fw: &adev->firmware.gpu_info_fw,
2593	required: AMDGPU_UCODE_OPTIONAL,
2594	fmt: "amdgpu/%s_gpu_info.bin", chip_name);
2595	if (err) {
2596	dev_err(adev->dev,
2597	"Failed to get gpu_info firmware \"%s_gpu_info.bin\"\n",
2598	chip_name);
2599	goto out;
2600	}
2601
2602	hdr = (const struct gpu_info_firmware_header_v1_0 *)adev->firmware.gpu_info_fw->data;
2603	amdgpu_ucode_print_gpu_info_hdr(hdr: &hdr->header);
2604
2605	switch (hdr->version_major) {
2606	case `1`:
2607	{
2608	const struct gpu_info_firmware_v1_0 *gpu_info_fw =
2609	(const struct gpu_info_firmware_v1_0 *)(adev->firmware.gpu_info_fw->data +
2610	le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2611
2612	/*
2613	* Should be dropped when DAL no longer needs it.
2614	*/
2615	if (adev->asic_type == CHIP_NAVI12)
2616	goto parse_soc_bounding_box;
2617
2618	adev->gfx.config.max_shader_engines = le32_to_cpu(gpu_info_fw->gc_num_se);
2619	adev->gfx.config.max_cu_per_sh = le32_to_cpu(gpu_info_fw->gc_num_cu_per_sh);
2620	adev->gfx.config.max_sh_per_se = le32_to_cpu(gpu_info_fw->gc_num_sh_per_se);
2621	adev->gfx.config.max_backends_per_se = le32_to_cpu(gpu_info_fw->gc_num_rb_per_se);
2622	adev->gfx.config.max_texture_channel_caches =
2623	le32_to_cpu(gpu_info_fw->gc_num_tccs);
2624	adev->gfx.config.max_gprs = le32_to_cpu(gpu_info_fw->gc_num_gprs);
2625	adev->gfx.config.max_gs_threads = le32_to_cpu(gpu_info_fw->gc_num_max_gs_thds);
2626	adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gpu_info_fw->gc_gs_table_depth);
2627	adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gpu_info_fw->gc_gsprim_buff_depth);
2628	adev->gfx.config.double_offchip_lds_buf =
2629	le32_to_cpu(gpu_info_fw->gc_double_offchip_lds_buffer);
2630	adev->gfx.cu_info.wave_front_size = le32_to_cpu(gpu_info_fw->gc_wave_size);
2631	adev->gfx.cu_info.max_waves_per_simd =
2632	le32_to_cpu(gpu_info_fw->gc_max_waves_per_simd);
2633	adev->gfx.cu_info.max_scratch_slots_per_cu =
2634	le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
2635	adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
2636	if (hdr->version_minor >= `1`) {
2637	const struct gpu_info_firmware_v1_1 *gpu_info_fw =
2638	(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
2639	le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2640	adev->gfx.config.num_sc_per_sh =
2641	le32_to_cpu(gpu_info_fw->num_sc_per_sh);
2642	adev->gfx.config.num_packer_per_sc =
2643	le32_to_cpu(gpu_info_fw->num_packer_per_sc);
2644	}
2645
2646	parse_soc_bounding_box:
2647	/*
2648	* soc bounding box info is not integrated in disocovery table,
2649	* we always need to parse it from gpu info firmware if needed.
2650	*/
2651	if (hdr->version_minor == `2`) {
2652	const struct gpu_info_firmware_v1_2 *gpu_info_fw =
2653	(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
2654	le32_to_cpu(hdr->header.ucode_array_offset_bytes));
2655	adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
2656	}
2657	break;
2658	}
2659	default:
2660	dev_err(adev->dev,
2661	"Unsupported gpu_info table %d\n", hdr->header.ucode_version);
2662	err = -EINVAL;
2663	goto out;
2664	}
2665	out:
2666	return err;
2667	}
2668
2669	/**
2670	* amdgpu_device_ip_early_init - run early init for hardware IPs
2671	*
2672	* @adev: amdgpu_device pointer
2673	*
2674	* Early initialization pass for hardware IPs. The hardware IPs that make
2675	* up each asic are discovered each IP's early_init callback is run. This
2676	* is the first stage in initializing the asic.
2677	* Returns 0 on success, negative error code on failure.
2678	*/
2679	static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
2680	{
2681	struct amdgpu_ip_block *ip_block;
2682	struct pci_dev *parent;
2683	bool total, skip_bios;
2684	uint32_t bios_flags;
2685	int i, r;
2686
2687	amdgpu_device_enable_virtual_display(adev);
2688
2689	if (amdgpu_sriov_vf(adev)) {
2690	r = amdgpu_virt_request_full_gpu(adev, init: true);
2691	if (r)
2692	return r;
2693	}
2694
2695	switch (adev->asic_type) {
2696	#ifdef CONFIG_DRM_AMDGPU_SI
2697	case CHIP_VERDE:
2698	case CHIP_TAHITI:
2699	case CHIP_PITCAIRN:
2700	case CHIP_OLAND:
2701	case CHIP_HAINAN:
2702	adev->family = AMDGPU_FAMILY_SI;
2703	r = si_set_ip_blocks(adev);
2704	if (r)
2705	return r;
2706	break;
2707	#endif
2708	#ifdef CONFIG_DRM_AMDGPU_CIK
2709	case CHIP_BONAIRE:
2710	case CHIP_HAWAII:
2711	case CHIP_KAVERI:
2712	case CHIP_KABINI:
2713	case CHIP_MULLINS:
2714	if (adev->flags & AMD_IS_APU)
2715	adev->family = AMDGPU_FAMILY_KV;
2716	else
2717	adev->family = AMDGPU_FAMILY_CI;
2718
2719	r = cik_set_ip_blocks(adev);
2720	if (r)
2721	return r;
2722	break;
2723	#endif
2724	case CHIP_TOPAZ:
2725	case CHIP_TONGA:
2726	case CHIP_FIJI:
2727	case CHIP_POLARIS10:
2728	case CHIP_POLARIS11:
2729	case CHIP_POLARIS12:
2730	case CHIP_VEGAM:
2731	case CHIP_CARRIZO:
2732	case CHIP_STONEY:
2733	if (adev->flags & AMD_IS_APU)
2734	adev->family = AMDGPU_FAMILY_CZ;
2735	else
2736	adev->family = AMDGPU_FAMILY_VI;
2737
2738	r = vi_set_ip_blocks(adev);
2739	if (r)
2740	return r;
2741	break;
2742	default:
2743	r = amdgpu_discovery_set_ip_blocks(adev);
2744	if (r)
2745	return r;
2746	break;
2747	}
2748
2749	/ Check for IP version 9.4.3 with A0 hardware /
2750	if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `3`) &&
2751	!amdgpu_device_get_rev_id(adev)) {
2752	dev_err(adev->dev, "Unsupported A0 hardware\n");
2753	return -ENODEV; / device unsupported - no device error /
2754	}
2755
2756	if (amdgpu_has_atpx() &&
2757	(amdgpu_is_atpx_hybrid() \|\|
2758	amdgpu_has_atpx_dgpu_power_cntl()) &&
2759	((adev->flags & AMD_IS_APU) == `0`) &&
2760	!dev_is_removable(dev: &adev->pdev->dev))
2761	adev->flags \|= AMD_IS_PX;
2762
2763	if (!(adev->flags & AMD_IS_APU)) {
2764	parent = pcie_find_root_port(dev: adev->pdev);
2765	adev->has_pr3 = parent ? pci_pr3_present(pdev: parent) : false;
2766	}
2767
2768	adev->pm.pp_feature = amdgpu_pp_feature_mask;
2769	if (amdgpu_sriov_vf(adev) \|\| sched_policy == KFD_SCHED_POLICY_NO_HWS)
2770	adev->pm.pp_feature &= ~PP_GFXOFF_MASK;
2771	if (amdgpu_sriov_vf(adev) && adev->asic_type == CHIP_SIENNA_CICHLID)
2772	adev->pm.pp_feature &= ~PP_OVERDRIVE_MASK;
2773	if (!amdgpu_device_pcie_dynamic_switching_supported(adev))
2774	adev->pm.pp_feature &= ~PP_PCIE_DPM_MASK;
2775
2776	total = true;
2777	for (i = `0`; i < adev->num_ip_blocks; i++) {
2778	ip_block = &adev->ip_blocks[i];
2779
2780	if ((amdgpu_ip_block_mask & (`1` << i)) == `0`) {
2781	DRM_WARN("disabled ip block: %d <%s>\n",
2782	i, adev->ip_blocks[i].version->funcs->name);
2783	adev->ip_blocks[i].status.valid = false;
2784	} else if (ip_block->version->funcs->early_init) {
2785	r = ip_block->version->funcs->early_init(ip_block);
2786	if (r == -ENOENT) {
2787	adev->ip_blocks[i].status.valid = false;
2788	} else if (r) {
2789	DRM_ERROR("early_init of IP block <%s> failed %d\n",
2790	adev->ip_blocks[i].version->funcs->name, r);
2791	total = false;
2792	} else {
2793	adev->ip_blocks[i].status.valid = true;
2794	}
2795	} else {
2796	adev->ip_blocks[i].status.valid = true;
2797	}
2798	/ get the vbios after the asic_funcs are set up /
2799	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
2800	r = amdgpu_device_parse_gpu_info_fw(adev);
2801	if (r)
2802	return r;
2803
2804	bios_flags = amdgpu_device_get_vbios_flags(adev);
2805	skip_bios = !!(bios_flags & AMDGPU_VBIOS_SKIP);
2806	/ Read BIOS /
2807	if (!skip_bios) {
2808	bool optional =
2809	!!(bios_flags & AMDGPU_VBIOS_OPTIONAL);
2810	if (!amdgpu_get_bios(adev) && !optional)
2811	return -EINVAL;
2812
2813	if (optional && !adev->bios)
2814	dev_info(
2815	adev->dev,
2816	"VBIOS image optional, proceeding without VBIOS image");
2817
2818	if (adev->bios) {
2819	r = amdgpu_atombios_init(adev);
2820	if (r) {
2821	dev_err(adev->dev,
2822	"amdgpu_atombios_init failed\n");
2823	amdgpu_vf_error_put(
2824	adev,
2825	sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
2826	error_flags: `0`, error_data: `0`);
2827	return r;
2828	}
2829	}
2830	}
2831
2832	/get pf2vf msg info at it's earliest time/
2833	if (amdgpu_sriov_vf(adev))
2834	amdgpu_virt_init_data_exchange(adev);
2835
2836	}
2837	}
2838	if (!total)
2839	return -ENODEV;
2840
2841	if (adev->gmc.xgmi.supported)
2842	amdgpu_xgmi_early_init(adev);
2843
2844	ip_block = amdgpu_device_ip_get_ip_block(adev, type: AMD_IP_BLOCK_TYPE_GFX);
2845	if (ip_block->status.valid != false)
2846	amdgpu_amdkfd_device_probe(adev);
2847
2848	adev->cg_flags &= amdgpu_cg_mask;
2849	adev->pg_flags &= amdgpu_pg_mask;
2850
2851	return `0`;
2852	}
2853
2854	static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
2855	{
2856	int i, r;
2857
2858	for (i = `0`; i < adev->num_ip_blocks; i++) {
2859	if (!adev->ip_blocks[i].status.sw)
2860	continue;
2861	if (adev->ip_blocks[i].status.hw)
2862	continue;
2863	if (!amdgpu_ip_member_of_hwini(
2864	adev, block: adev->ip_blocks[i].version->type))
2865	continue;
2866	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON \|\|
2867	(amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) \|\|
2868	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
2869	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2870	if (r) {
2871	DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2872	adev->ip_blocks[i].version->funcs->name, r);
2873	return r;
2874	}
2875	adev->ip_blocks[i].status.hw = true;
2876	}
2877	}
2878
2879	return `0`;
2880	}
2881
2882	static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev)
2883	{
2884	int i, r;
2885
2886	for (i = `0`; i < adev->num_ip_blocks; i++) {
2887	if (!adev->ip_blocks[i].status.sw)
2888	continue;
2889	if (adev->ip_blocks[i].status.hw)
2890	continue;
2891	if (!amdgpu_ip_member_of_hwini(
2892	adev, block: adev->ip_blocks[i].version->type))
2893	continue;
2894	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2895	if (r) {
2896	DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2897	adev->ip_blocks[i].version->funcs->name, r);
2898	return r;
2899	}
2900	adev->ip_blocks[i].status.hw = true;
2901	}
2902
2903	return `0`;
2904	}
2905
2906	static int amdgpu_device_fw_loading(struct amdgpu_device *adev)
2907	{
2908	int r = `0`;
2909	int i;
2910	uint32_t smu_version;
2911
2912	if (adev->asic_type >= CHIP_VEGA10) {
2913	for (i = `0`; i < adev->num_ip_blocks; i++) {
2914	if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP)
2915	continue;
2916
2917	if (!amdgpu_ip_member_of_hwini(adev,
2918	block: AMD_IP_BLOCK_TYPE_PSP))
2919	break;
2920
2921	if (!adev->ip_blocks[i].status.sw)
2922	continue;
2923
2924	/ no need to do the fw loading again if already done/
2925	if (adev->ip_blocks[i].status.hw == true)
2926	break;
2927
2928	if (amdgpu_in_reset(adev) \|\| adev->in_suspend) {
2929	r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]);
2930	if (r)
2931	return r;
2932	} else {
2933	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
2934	if (r) {
2935	DRM_ERROR("hw_init of IP block <%s> failed %d\n",
2936	adev->ip_blocks[i].version->funcs->name, r);
2937	return r;
2938	}
2939	adev->ip_blocks[i].status.hw = true;
2940	}
2941	break;
2942	}
2943	}
2944
2945	if (!amdgpu_sriov_vf(adev) \|\| adev->asic_type == CHIP_TONGA)
2946	r = amdgpu_pm_load_smu_firmware(adev, smu_version: &smu_version);
2947
2948	return r;
2949	}
2950
2951	static int amdgpu_device_init_schedulers(struct amdgpu_device *adev)
2952	{
2953	struct drm_sched_init_args args = {
2954	.ops = &amdgpu_sched_ops,
2955	.num_rqs = DRM_SCHED_PRIORITY_COUNT,
2956	.timeout_wq = adev->reset_domain->wq,
2957	.dev = adev->dev,
2958	};
2959	long timeout;
2960	int r, i;
2961
2962	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
2963	struct amdgpu_ring *ring = adev->rings[i];
2964
2965	/ No need to setup the GPU scheduler for rings that don't need it /
2966	if (!ring \|\| ring->no_scheduler)
2967	continue;
2968
2969	switch (ring->funcs->type) {
2970	case AMDGPU_RING_TYPE_GFX:
2971	timeout = adev->gfx_timeout;
2972	break;
2973	case AMDGPU_RING_TYPE_COMPUTE:
2974	timeout = adev->compute_timeout;
2975	break;
2976	case AMDGPU_RING_TYPE_SDMA:
2977	timeout = adev->sdma_timeout;
2978	break;
2979	default:
2980	timeout = adev->video_timeout;
2981	break;
2982	}
2983
2984	args.timeout = timeout;
2985	args.credit_limit = ring->num_hw_submission;
2986	args.score = ring->sched_score;
2987	args.name = ring->name;
2988
2989	r = drm_sched_init(sched: &ring->sched, args: &args);
2990	if (r) {
2991	DRM_ERROR("Failed to create scheduler on ring %s.\n",
2992	ring->name);
2993	return r;
2994	}
2995	r = amdgpu_uvd_entity_init(adev, ring);
2996	if (r) {
2997	DRM_ERROR("Failed to create UVD scheduling entity on ring %s.\n",
2998	ring->name);
2999	return r;
3000	}
3001	r = amdgpu_vce_entity_init(adev, ring);
3002	if (r) {
3003	DRM_ERROR("Failed to create VCE scheduling entity on ring %s.\n",
3004	ring->name);
3005	return r;
3006	}
3007	}
3008
3009	amdgpu_xcp_update_partition_sched_list(adev);
3010
3011	return `0`;
3012	}
3013
3014
3015	/**
3016	* amdgpu_device_ip_init - run init for hardware IPs
3017	*
3018	* @adev: amdgpu_device pointer
3019	*
3020	* Main initialization pass for hardware IPs. The list of all the hardware
3021	* IPs that make up the asic is walked and the sw_init and hw_init callbacks
3022	* are run. sw_init initializes the software state associated with each IP
3023	* and hw_init initializes the hardware associated with each IP.
3024	* Returns 0 on success, negative error code on failure.
3025	*/
3026	static int amdgpu_device_ip_init(struct amdgpu_device *adev)
3027	{
3028	bool init_badpage;
3029	int i, r;
3030
3031	r = amdgpu_ras_init(adev);
3032	if (r)
3033	return r;
3034
3035	for (i = `0`; i < adev->num_ip_blocks; i++) {
3036	if (!adev->ip_blocks[i].status.valid)
3037	continue;
3038	if (adev->ip_blocks[i].version->funcs->sw_init) {
3039	r = adev->ip_blocks[i].version->funcs->sw_init(&adev->ip_blocks[i]);
3040	if (r) {
3041	DRM_ERROR("sw_init of IP block <%s> failed %d\n",
3042	adev->ip_blocks[i].version->funcs->name, r);
3043	goto init_failed;
3044	}
3045	}
3046	adev->ip_blocks[i].status.sw = true;
3047
3048	if (!amdgpu_ip_member_of_hwini(
3049	adev, block: adev->ip_blocks[i].version->type))
3050	continue;
3051
3052	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
3053	/ need to do common hw init early so everything is set up for gmc /
3054	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3055	if (r) {
3056	DRM_ERROR("hw_init %d failed %d\n", i, r);
3057	goto init_failed;
3058	}
3059	adev->ip_blocks[i].status.hw = true;
3060	} else if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3061	/ need to do gmc hw init early so we can allocate gpu mem /
3062	/ Try to reserve bad pages early /
3063	if (amdgpu_sriov_vf(adev))
3064	amdgpu_virt_exchange_data(adev);
3065
3066	r = amdgpu_device_mem_scratch_init(adev);
3067	if (r) {
3068	DRM_ERROR("amdgpu_mem_scratch_init failed %d\n", r);
3069	goto init_failed;
3070	}
3071	r = adev->ip_blocks[i].version->funcs->hw_init(&adev->ip_blocks[i]);
3072	if (r) {
3073	DRM_ERROR("hw_init %d failed %d\n", i, r);
3074	goto init_failed;
3075	}
3076	r = amdgpu_device_wb_init(adev);
3077	if (r) {
3078	DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
3079	goto init_failed;
3080	}
3081	adev->ip_blocks[i].status.hw = true;
3082
3083	/ right after GMC hw init, we create CSA /
3084	if (adev->gfx.mcbp) {
3085	r = amdgpu_allocate_static_csa(adev, bo: &adev->virt.csa_obj,
3086	AMDGPU_GEM_DOMAIN_VRAM \|
3087	AMDGPU_GEM_DOMAIN_GTT,
3088	AMDGPU_CSA_SIZE);
3089	if (r) {
3090	DRM_ERROR("allocate CSA failed %d\n", r);
3091	goto init_failed;
3092	}
3093	}
3094
3095	r = amdgpu_seq64_init(adev);
3096	if (r) {
3097	DRM_ERROR("allocate seq64 failed %d\n", r);
3098	goto init_failed;
3099	}
3100	}
3101	}
3102
3103	if (amdgpu_sriov_vf(adev))
3104	amdgpu_virt_init_data_exchange(adev);
3105
3106	r = amdgpu_ib_pool_init(adev);
3107	if (r) {
3108	dev_err(adev->dev, "IB initialization failed (%d).\n", r);
3109	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_IB_INIT_FAIL, error_flags: `0`, error_data: r);
3110	goto init_failed;
3111	}
3112
3113	r = amdgpu_ucode_create_bo(adev); / create ucode bo when sw_init complete/
3114	if (r)
3115	goto init_failed;
3116
3117	r = amdgpu_device_ip_hw_init_phase1(adev);
3118	if (r)
3119	goto init_failed;
3120
3121	r = amdgpu_device_fw_loading(adev);
3122	if (r)
3123	goto init_failed;
3124
3125	r = amdgpu_device_ip_hw_init_phase2(adev);
3126	if (r)
3127	goto init_failed;
3128
3129	/*
3130	* retired pages will be loaded from eeprom and reserved here,
3131	* it should be called after amdgpu_device_ip_hw_init_phase2 since
3132	* for some ASICs the RAS EEPROM code relies on SMU fully functioning
3133	* for I2C communication which only true at this point.
3134	*
3135	* amdgpu_ras_recovery_init may fail, but the upper only cares the
3136	* failure from bad gpu situation and stop amdgpu init process
3137	* accordingly. For other failed cases, it will still release all
3138	* the resource and print error message, rather than returning one
3139	* negative value to upper level.
3140	*
3141	* Note: theoretically, this should be called before all vram allocations
3142	* to protect retired page from abusing
3143	*/
3144	init_badpage = (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
3145	r = amdgpu_ras_recovery_init(adev, init_bp_info: init_badpage);
3146	if (r)
3147	goto init_failed;
3148
3149	/**
3150	* In case of XGMI grab extra reference for reset domain for this device
3151	*/
3152	if (adev->gmc.xgmi.num_physical_nodes > `1`) {
3153	if (amdgpu_xgmi_add_device(adev) == `0`) {
3154	if (!amdgpu_sriov_vf(adev)) {
3155	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
3156
3157	if (WARN_ON(!hive)) {
3158	r = -ENOENT;
3159	goto init_failed;
3160	}
3161
3162	if (!hive->reset_domain \|\|
3163	!amdgpu_reset_get_reset_domain(domain: hive->reset_domain)) {
3164	r = -ENOENT;
3165	amdgpu_put_xgmi_hive(hive);
3166	goto init_failed;
3167	}
3168
3169	/ Drop the early temporary reset domain we created for device /
3170	amdgpu_reset_put_reset_domain(domain: adev->reset_domain);
3171	adev->reset_domain = hive->reset_domain;
3172	amdgpu_put_xgmi_hive(hive);
3173	}
3174	}
3175	}
3176
3177	r = amdgpu_device_init_schedulers(adev);
3178	if (r)
3179	goto init_failed;
3180
3181	if (adev->mman.buffer_funcs_ring->sched.ready)
3182	amdgpu_ttm_set_buffer_funcs_status(adev, enable: true);
3183
3184	/ Don't init kfd if whole hive need to be reset during init /
3185	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
3186	kgd2kfd_init_zone_device(adev);
3187	amdgpu_amdkfd_device_init(adev);
3188	}
3189
3190	amdgpu_fru_get_product_info(adev);
3191
3192	if (!amdgpu_sriov_vf(adev) \|\| amdgpu_sriov_ras_cper_en(adev))
3193	r = amdgpu_cper_init(adev);
3194
3195	init_failed:
3196
3197	return r;
3198	}
3199
3200	/**
3201	* amdgpu_device_fill_reset_magic - writes reset magic to gart pointer
3202	*
3203	* @adev: amdgpu_device pointer
3204	*
3205	* Writes a reset magic value to the gart pointer in VRAM. The driver calls
3206	* this function before a GPU reset. If the value is retained after a
3207	* GPU reset, VRAM has not been lost. Some GPU resets may destroy VRAM contents.
3208	*/
3209	static void amdgpu_device_fill_reset_magic(struct amdgpu_device *adev)
3210	{
3211	memcpy(adev->reset_magic, adev->gart.ptr, AMDGPU_RESET_MAGIC_NUM);
3212	}
3213
3214	/**
3215	* amdgpu_device_check_vram_lost - check if vram is valid
3216	*
3217	* @adev: amdgpu_device pointer
3218	*
3219	* Checks the reset magic value written to the gart pointer in VRAM.
3220	* The driver calls this after a GPU reset to see if the contents of
3221	* VRAM is lost or now.
3222	* returns true if vram is lost, false if not.
3223	*/
3224	static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev)
3225	{
3226	if (memcmp(p: adev->gart.ptr, q: adev->reset_magic,
3227	AMDGPU_RESET_MAGIC_NUM))
3228	return true;
3229
3230	if (!amdgpu_in_reset(adev))
3231	return false;
3232
3233	/*
3234	* For all ASICs with baco/mode1 reset, the VRAM is
3235	* always assumed to be lost.
3236	*/
3237	switch (amdgpu_asic_reset_method(adev)) {
3238	case AMD_RESET_METHOD_LINK:
3239	case AMD_RESET_METHOD_BACO:
3240	case AMD_RESET_METHOD_MODE1:
3241	return true;
3242	default:
3243	return false;
3244	}
3245	}
3246
3247	/**
3248	* amdgpu_device_set_cg_state - set clockgating for amdgpu device
3249	*
3250	* @adev: amdgpu_device pointer
3251	* @state: clockgating state (gate or ungate)
3252	*
3253	* The list of all the hardware IPs that make up the asic is walked and the
3254	* set_clockgating_state callbacks are run.
3255	* Late initialization pass enabling clockgating for hardware IPs.
3256	* Fini or suspend, pass disabling clockgating for hardware IPs.
3257	* Returns 0 on success, negative error code on failure.
3258	*/
3259
3260	int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
3261	enum amd_clockgating_state state)
3262	{
3263	int i, j, r;
3264
3265	if (amdgpu_emu_mode == `1`)
3266	return `0`;
3267
3268	for (j = `0`; j < adev->num_ip_blocks; j++) {
3269	i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - `1`;
3270	if (!adev->ip_blocks[i].status.late_initialized)
3271	continue;
3272	/ skip CG for GFX, SDMA on S0ix /
3273	if (adev->in_s0ix &&
3274	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX \|\|
3275	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3276	continue;
3277	/ skip CG for VCE/UVD, it's handled specially /
3278	if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3279	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3280	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3281	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3282	adev->ip_blocks[i].version->funcs->set_clockgating_state) {
3283	/ enable clockgating to save power /
3284	r = adev->ip_blocks[i].version->funcs->set_clockgating_state(&adev->ip_blocks[i],
3285	state);
3286	if (r) {
3287	DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n",
3288	adev->ip_blocks[i].version->funcs->name, r);
3289	return r;
3290	}
3291	}
3292	}
3293
3294	return `0`;
3295	}
3296
3297	int amdgpu_device_set_pg_state(struct amdgpu_device *adev,
3298	enum amd_powergating_state state)
3299	{
3300	int i, j, r;
3301
3302	if (amdgpu_emu_mode == `1`)
3303	return `0`;
3304
3305	for (j = `0`; j < adev->num_ip_blocks; j++) {
3306	i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - `1`;
3307	if (!adev->ip_blocks[i].status.late_initialized)
3308	continue;
3309	/ skip PG for GFX, SDMA on S0ix /
3310	if (adev->in_s0ix &&
3311	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX \|\|
3312	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
3313	continue;
3314	/ skip CG for VCE/UVD, it's handled specially /
3315	if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
3316	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
3317	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
3318	adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_JPEG &&
3319	adev->ip_blocks[i].version->funcs->set_powergating_state) {
3320	/ enable powergating to save power /
3321	r = adev->ip_blocks[i].version->funcs->set_powergating_state(&adev->ip_blocks[i],
3322	state);
3323	if (r) {
3324	DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
3325	adev->ip_blocks[i].version->funcs->name, r);
3326	return r;
3327	}
3328	}
3329	}
3330	return `0`;
3331	}
3332
3333	static int amdgpu_device_enable_mgpu_fan_boost(void)
3334	{
3335	struct amdgpu_gpu_instance *gpu_ins;
3336	struct amdgpu_device *adev;
3337	int i, ret = `0`;
3338
3339	mutex_lock(&mgpu_info.mutex);
3340
3341	/*
3342	* MGPU fan boost feature should be enabled
3343	* only when there are two or more dGPUs in
3344	* the system
3345	*/
3346	if (mgpu_info.num_dgpu < `2`)
3347	goto out;
3348
3349	for (i = `0`; i < mgpu_info.num_dgpu; i++) {
3350	gpu_ins = &(mgpu_info.gpu_ins[i]);
3351	adev = gpu_ins->adev;
3352	if (!(adev->flags & AMD_IS_APU) &&
3353	!gpu_ins->mgpu_fan_enabled) {
3354	ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
3355	if (ret)
3356	break;
3357
3358	gpu_ins->mgpu_fan_enabled = `1`;
3359	}
3360	}
3361
3362	out:
3363	mutex_unlock(lock: &mgpu_info.mutex);
3364
3365	return ret;
3366	}
3367
3368	/**
3369	* amdgpu_device_ip_late_init - run late init for hardware IPs
3370	*
3371	* @adev: amdgpu_device pointer
3372	*
3373	* Late initialization pass for hardware IPs. The list of all the hardware
3374	* IPs that make up the asic is walked and the late_init callbacks are run.
3375	* late_init covers any special initialization that an IP requires
3376	* after all of the have been initialized or something that needs to happen
3377	* late in the init process.
3378	* Returns 0 on success, negative error code on failure.
3379	*/
3380	static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
3381	{
3382	struct amdgpu_gpu_instance *gpu_instance;
3383	int i = `0`, r;
3384
3385	for (i = `0`; i < adev->num_ip_blocks; i++) {
3386	if (!adev->ip_blocks[i].status.hw)
3387	continue;
3388	if (adev->ip_blocks[i].version->funcs->late_init) {
3389	r = adev->ip_blocks[i].version->funcs->late_init(&adev->ip_blocks[i]);
3390	if (r) {
3391	DRM_ERROR("late_init of IP block <%s> failed %d\n",
3392	adev->ip_blocks[i].version->funcs->name, r);
3393	return r;
3394	}
3395	}
3396	adev->ip_blocks[i].status.late_initialized = true;
3397	}
3398
3399	r = amdgpu_ras_late_init(adev);
3400	if (r) {
3401	DRM_ERROR("amdgpu_ras_late_init failed %d", r);
3402	return r;
3403	}
3404
3405	if (!amdgpu_reset_in_recovery(adev))
3406	amdgpu_ras_set_error_query_ready(adev, ready: true);
3407
3408	amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_GATE);
3409	amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_GATE);
3410
3411	amdgpu_device_fill_reset_magic(adev);
3412
3413	r = amdgpu_device_enable_mgpu_fan_boost();
3414	if (r)
3415	DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
3416
3417	/ For passthrough configuration on arcturus and aldebaran, enable special handling SBR /
3418	if (amdgpu_passthrough(adev) &&
3419	((adev->asic_type == CHIP_ARCTURUS && adev->gmc.xgmi.num_physical_nodes > `1`) \|\|
3420	adev->asic_type == CHIP_ALDEBARAN))
3421	amdgpu_dpm_handle_passthrough_sbr(adev, enable: true);
3422
3423	if (adev->gmc.xgmi.num_physical_nodes > `1`) {
3424	mutex_lock(&mgpu_info.mutex);
3425
3426	/*
3427	* Reset device p-state to low as this was booted with high.
3428	*
3429	* This should be performed only after all devices from the same
3430	* hive get initialized.
3431	*
3432	* However, it's unknown how many device in the hive in advance.
3433	* As this is counted one by one during devices initializations.
3434	*
3435	* So, we wait for all XGMI interlinked devices initialized.
3436	* This may bring some delays as those devices may come from
3437	* different hives. But that should be OK.
3438	*/
3439	if (mgpu_info.num_dgpu == adev->gmc.xgmi.num_physical_nodes) {
3440	for (i = `0`; i < mgpu_info.num_gpu; i++) {
3441	gpu_instance = &(mgpu_info.gpu_ins[i]);
3442	if (gpu_instance->adev->flags & AMD_IS_APU)
3443	continue;
3444
3445	r = amdgpu_xgmi_set_pstate(adev: gpu_instance->adev,
3446	pstate: AMDGPU_XGMI_PSTATE_MIN);
3447	if (r) {
3448	DRM_ERROR("pstate setting failed (%d).\n", r);
3449	break;
3450	}
3451	}
3452	}
3453
3454	mutex_unlock(lock: &mgpu_info.mutex);
3455	}
3456
3457	return `0`;
3458	}
3459
3460	static void amdgpu_ip_block_hw_fini(struct amdgpu_ip_block *ip_block)
3461	{
3462	int r;
3463
3464	if (!ip_block->version->funcs->hw_fini) {
3465	DRM_ERROR("hw_fini of IP block <%s> not defined\n",
3466	ip_block->version->funcs->name);
3467	} else {
3468	r = ip_block->version->funcs->hw_fini(ip_block);
3469	/ XXX handle errors /
3470	if (r) {
3471	DRM_DEBUG("hw_fini of IP block <%s> failed %d\n",
3472	ip_block->version->funcs->name, r);
3473	}
3474	}
3475
3476	ip_block->status.hw = false;
3477	}
3478
3479	/**
3480	* amdgpu_device_smu_fini_early - smu hw_fini wrapper
3481	*
3482	* @adev: amdgpu_device pointer
3483	*
3484	* For ASICs need to disable SMC first
3485	*/
3486	static void amdgpu_device_smu_fini_early(struct amdgpu_device *adev)
3487	{
3488	int i;
3489
3490	if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) > IP_VERSION(`9`, `0`, `0`))
3491	return;
3492
3493	for (i = `0`; i < adev->num_ip_blocks; i++) {
3494	if (!adev->ip_blocks[i].status.hw)
3495	continue;
3496	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3497	amdgpu_ip_block_hw_fini(ip_block: &adev->ip_blocks[i]);
3498	break;
3499	}
3500	}
3501	}
3502
3503	static int amdgpu_device_ip_fini_early(struct amdgpu_device *adev)
3504	{
3505	int i, r;
3506
3507	for (i = `0`; i < adev->num_ip_blocks; i++) {
3508	if (!adev->ip_blocks[i].version->funcs->early_fini)
3509	continue;
3510
3511	r = adev->ip_blocks[i].version->funcs->early_fini(&adev->ip_blocks[i]);
3512	if (r) {
3513	DRM_DEBUG("early_fini of IP block <%s> failed %d\n",
3514	adev->ip_blocks[i].version->funcs->name, r);
3515	}
3516	}
3517
3518	amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_UNGATE);
3519	amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_UNGATE);
3520
3521	amdgpu_amdkfd_suspend(adev, run_pm: false);
3522	amdgpu_userq_suspend(adev);
3523
3524	/ Workaround for ASICs need to disable SMC first /
3525	amdgpu_device_smu_fini_early(adev);
3526
3527	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3528	if (!adev->ip_blocks[i].status.hw)
3529	continue;
3530
3531	amdgpu_ip_block_hw_fini(ip_block: &adev->ip_blocks[i]);
3532	}
3533
3534	if (amdgpu_sriov_vf(adev)) {
3535	if (amdgpu_virt_release_full_gpu(adev, init: false))
3536	DRM_ERROR("failed to release exclusive mode on fini\n");
3537	}
3538
3539	return `0`;
3540	}
3541
3542	/**
3543	* amdgpu_device_ip_fini - run fini for hardware IPs
3544	*
3545	* @adev: amdgpu_device pointer
3546	*
3547	* Main teardown pass for hardware IPs. The list of all the hardware
3548	* IPs that make up the asic is walked and the hw_fini and sw_fini callbacks
3549	* are run. hw_fini tears down the hardware associated with each IP
3550	* and sw_fini tears down any software state associated with each IP.
3551	* Returns 0 on success, negative error code on failure.
3552	*/
3553	static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
3554	{
3555	int i, r;
3556
3557	amdgpu_cper_fini(adev);
3558
3559	if (amdgpu_sriov_vf(adev) && adev->virt.ras_init_done)
3560	amdgpu_virt_release_ras_err_handler_data(adev);
3561
3562	if (adev->gmc.xgmi.num_physical_nodes > `1`)
3563	amdgpu_xgmi_remove_device(adev);
3564
3565	amdgpu_amdkfd_device_fini_sw(adev);
3566
3567	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3568	if (!adev->ip_blocks[i].status.sw)
3569	continue;
3570
3571	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) {
3572	amdgpu_ucode_free_bo(adev);
3573	amdgpu_free_static_csa(bo: &adev->virt.csa_obj);
3574	amdgpu_device_wb_fini(adev);
3575	amdgpu_device_mem_scratch_fini(adev);
3576	amdgpu_ib_pool_fini(adev);
3577	amdgpu_seq64_fini(adev);
3578	amdgpu_doorbell_fini(adev);
3579	}
3580	if (adev->ip_blocks[i].version->funcs->sw_fini) {
3581	r = adev->ip_blocks[i].version->funcs->sw_fini(&adev->ip_blocks[i]);
3582	/ XXX handle errors /
3583	if (r) {
3584	DRM_DEBUG("sw_fini of IP block <%s> failed %d\n",
3585	adev->ip_blocks[i].version->funcs->name, r);
3586	}
3587	}
3588	adev->ip_blocks[i].status.sw = false;
3589	adev->ip_blocks[i].status.valid = false;
3590	}
3591
3592	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3593	if (!adev->ip_blocks[i].status.late_initialized)
3594	continue;
3595	if (adev->ip_blocks[i].version->funcs->late_fini)
3596	adev->ip_blocks[i].version->funcs->late_fini(&adev->ip_blocks[i]);
3597	adev->ip_blocks[i].status.late_initialized = false;
3598	}
3599
3600	amdgpu_ras_fini(adev);
3601
3602	return `0`;
3603	}
3604
3605	/**
3606	* amdgpu_device_delayed_init_work_handler - work handler for IB tests
3607	*
3608	* @work: work_struct.
3609	*/
3610	static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
3611	{
3612	struct amdgpu_device *adev =
3613	container_of(work, struct amdgpu_device, delayed_init_work.work);
3614	int r;
3615
3616	r = amdgpu_ib_ring_tests(adev);
3617	if (r)
3618	DRM_ERROR("ib ring test failed (%d).\n", r);
3619	}
3620
3621	static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
3622	{
3623	struct amdgpu_device *adev =
3624	container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work);
3625
3626	WARN_ON_ONCE(adev->gfx.gfx_off_state);
3627	WARN_ON_ONCE(adev->gfx.gfx_off_req_count);
3628
3629	if (!amdgpu_dpm_set_powergating_by_smu(adev, block_type: AMD_IP_BLOCK_TYPE_GFX, gate: true, inst: `0`))
3630	adev->gfx.gfx_off_state = true;
3631	}
3632
3633	/**
3634	* amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
3635	*
3636	* @adev: amdgpu_device pointer
3637	*
3638	* Main suspend function for hardware IPs. The list of all the hardware
3639	* IPs that make up the asic is walked, clockgating is disabled and the
3640	* suspend callbacks are run. suspend puts the hardware and software state
3641	* in each IP into a state suitable for suspend.
3642	* Returns 0 on success, negative error code on failure.
3643	*/
3644	static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
3645	{
3646	int i, r;
3647
3648	amdgpu_device_set_pg_state(adev, state: AMD_PG_STATE_UNGATE);
3649	amdgpu_device_set_cg_state(adev, state: AMD_CG_STATE_UNGATE);
3650
3651	/*
3652	* Per PMFW team's suggestion, driver needs to handle gfxoff
3653	* and df cstate features disablement for gpu reset(e.g. Mode1Reset)
3654	* scenario. Add the missing df cstate disablement here.
3655	*/
3656	if (amdgpu_dpm_set_df_cstate(adev, cstate: DF_CSTATE_DISALLOW))
3657	dev_warn(adev->dev, "Failed to disallow df cstate");
3658
3659	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3660	if (!adev->ip_blocks[i].status.valid)
3661	continue;
3662
3663	/ displays are handled separately /
3664	if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_DCE)
3665	continue;
3666
3667	/ XXX handle errors /
3668	r = amdgpu_ip_block_suspend(ip_block: &adev->ip_blocks[i]);
3669	if (r)
3670	return r;
3671	}
3672
3673	return `0`;
3674	}
3675
3676	/**
3677	* amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
3678	*
3679	* @adev: amdgpu_device pointer
3680	*
3681	* Main suspend function for hardware IPs. The list of all the hardware
3682	* IPs that make up the asic is walked, clockgating is disabled and the
3683	* suspend callbacks are run. suspend puts the hardware and software state
3684	* in each IP into a state suitable for suspend.
3685	* Returns 0 on success, negative error code on failure.
3686	*/
3687	static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
3688	{
3689	int i, r;
3690
3691	if (adev->in_s0ix)
3692	amdgpu_dpm_gfx_state_change(adev, state: sGpuChangeState_D3Entry);
3693
3694	for (i = adev->num_ip_blocks - `1`; i >= `0`; i--) {
3695	if (!adev->ip_blocks[i].status.valid)
3696	continue;
3697	/ displays are handled in phase1 /
3698	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
3699	continue;
3700	/ PSP lost connection when err_event_athub occurs /
3701	if (amdgpu_ras_intr_triggered() &&
3702	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
3703	adev->ip_blocks[i].status.hw = false;
3704	continue;
3705	}
3706
3707	/ skip unnecessary suspend if we do not initialize them yet /
3708	if (!amdgpu_ip_member_of_hwini(
3709	adev, block: adev->ip_blocks[i].version->type))
3710	continue;
3711
3712	/ Since we skip suspend for S0i3, we need to cancel the delayed*
3713	* idle work here as the suspend callback never gets called.
3714	*/
3715	if (adev->in_s0ix &&
3716	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX &&
3717	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) >= IP_VERSION(`10`, `0`, `0`))
3718	cancel_delayed_work_sync(dwork: &adev->gfx.idle_work);
3719	/ skip suspend of gfx/mes and psp for S0ix*
3720	* gfx is in gfxoff state, so on resume it will exit gfxoff just
3721	* like at runtime. PSP is also part of the always on hardware
3722	* so no need to suspend it.
3723	*/
3724	if (adev->in_s0ix &&
3725	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP \|\|
3726	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX \|\|
3727	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
3728	continue;
3729
3730	/ SDMA 5.x+ is part of GFX power domain so it's covered by GFXOFF /
3731	if (adev->in_s0ix &&
3732	(amdgpu_ip_version(adev, ip: SDMA0_HWIP, inst: `0`) >=
3733	IP_VERSION(`5`, `0`, `0`)) &&
3734	(adev->ip_blocks[i].version->type ==
3735	AMD_IP_BLOCK_TYPE_SDMA))
3736	continue;
3737
3738	/ Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.*
3739	* These are in TMR, hence are expected to be reused by PSP-TOS to reload
3740	* from this location and RLC Autoload automatically also gets loaded
3741	* from here based on PMFW -> PSP message during re-init sequence.
3742	* Therefore, the psp suspend & resume should be skipped to avoid destroy
3743	* the TMR and reload FWs again for IMU enabled APU ASICs.
3744	*/
3745	if (amdgpu_in_reset(adev) &&
3746	(adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
3747	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3748	continue;
3749
3750	/ XXX handle errors /
3751	r = amdgpu_ip_block_suspend(ip_block: &adev->ip_blocks[i]);
3752	adev->ip_blocks[i].status.hw = false;
3753
3754	/ handle putting the SMC in the appropriate state /
3755	if (!amdgpu_sriov_vf(adev)) {
3756	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
3757	r = amdgpu_dpm_set_mp1_state(adev, mp1_state: adev->mp1_state);
3758	if (r) {
3759	DRM_ERROR("SMC failed to set mp1 state %d, %d\n",
3760	adev->mp1_state, r);
3761	return r;
3762	}
3763	}
3764	}
3765	}
3766
3767	return `0`;
3768	}
3769
3770	/**
3771	* amdgpu_device_ip_suspend - run suspend for hardware IPs
3772	*
3773	* @adev: amdgpu_device pointer
3774	*
3775	* Main suspend function for hardware IPs. The list of all the hardware
3776	* IPs that make up the asic is walked, clockgating is disabled and the
3777	* suspend callbacks are run. suspend puts the hardware and software state
3778	* in each IP into a state suitable for suspend.
3779	* Returns 0 on success, negative error code on failure.
3780	*/
3781	int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
3782	{
3783	int r;
3784
3785	if (amdgpu_sriov_vf(adev)) {
3786	amdgpu_virt_fini_data_exchange(adev);
3787	amdgpu_virt_request_full_gpu(adev, init: false);
3788	}
3789
3790	amdgpu_ttm_set_buffer_funcs_status(adev, enable: false);
3791
3792	r = amdgpu_device_ip_suspend_phase1(adev);
3793	if (r)
3794	return r;
3795	r = amdgpu_device_ip_suspend_phase2(adev);
3796
3797	if (amdgpu_sriov_vf(adev))
3798	amdgpu_virt_release_full_gpu(adev, init: false);
3799
3800	return r;
3801	}
3802
3803	static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
3804	{
3805	int i, r;
3806
3807	static enum amd_ip_block_type ip_order[] = {
3808	AMD_IP_BLOCK_TYPE_COMMON,
3809	AMD_IP_BLOCK_TYPE_GMC,
3810	AMD_IP_BLOCK_TYPE_PSP,
3811	AMD_IP_BLOCK_TYPE_IH,
3812	};
3813
3814	for (i = `0`; i < adev->num_ip_blocks; i++) {
3815	int j;
3816	struct amdgpu_ip_block *block;
3817
3818	block = &adev->ip_blocks[i];
3819	block->status.hw = false;
3820
3821	for (j = `0`; j < ARRAY_SIZE(ip_order); j++) {
3822
3823	if (block->version->type != ip_order[j] \|\|
3824	!block->status.valid)
3825	continue;
3826
3827	r = block->version->funcs->hw_init(&adev->ip_blocks[i]);
3828	if (r) {
3829	dev_err(adev->dev, "RE-INIT-early: %s failed\n",
3830	block->version->funcs->name);
3831	return r;
3832	}
3833	block->status.hw = true;
3834	}
3835	}
3836
3837	return `0`;
3838	}
3839
3840	static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
3841	{
3842	struct amdgpu_ip_block *block;
3843	int i, r = `0`;
3844
3845	static enum amd_ip_block_type ip_order[] = {
3846	AMD_IP_BLOCK_TYPE_SMC,
3847	AMD_IP_BLOCK_TYPE_DCE,
3848	AMD_IP_BLOCK_TYPE_GFX,
3849	AMD_IP_BLOCK_TYPE_SDMA,
3850	AMD_IP_BLOCK_TYPE_MES,
3851	AMD_IP_BLOCK_TYPE_UVD,
3852	AMD_IP_BLOCK_TYPE_VCE,
3853	AMD_IP_BLOCK_TYPE_VCN,
3854	AMD_IP_BLOCK_TYPE_JPEG
3855	};
3856
3857	for (i = `0`; i < ARRAY_SIZE(ip_order); i++) {
3858	block = amdgpu_device_ip_get_ip_block(adev, type: ip_order[i]);
3859
3860	if (!block)
3861	continue;
3862
3863	if (block->status.valid && !block->status.hw) {
3864	if (block->version->type == AMD_IP_BLOCK_TYPE_SMC) {
3865	r = amdgpu_ip_block_resume(ip_block: block);
3866	} else {
3867	r = block->version->funcs->hw_init(block);
3868	}
3869
3870	if (r) {
3871	dev_err(adev->dev, "RE-INIT-late: %s failed\n",
3872	block->version->funcs->name);
3873	break;
3874	}
3875	block->status.hw = true;
3876	}
3877	}
3878
3879	return r;
3880	}
3881
3882	/**
3883	* amdgpu_device_ip_resume_phase1 - run resume for hardware IPs
3884	*
3885	* @adev: amdgpu_device pointer
3886	*
3887	* First resume function for hardware IPs. The list of all the hardware
3888	* IPs that make up the asic is walked and the resume callbacks are run for
3889	* COMMON, GMC, and IH. resume puts the hardware into a functional state
3890	* after a suspend and updates the software state as necessary. This
3891	* function is also used for restoring the GPU after a GPU reset.
3892	* Returns 0 on success, negative error code on failure.
3893	*/
3894	static int amdgpu_device_ip_resume_phase1(struct amdgpu_device *adev)
3895	{
3896	int i, r;
3897
3898	for (i = `0`; i < adev->num_ip_blocks; i++) {
3899	if (!adev->ip_blocks[i].status.valid \|\| adev->ip_blocks[i].status.hw)
3900	continue;
3901	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON \|\|
3902	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC \|\|
3903	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH \|\|
3904	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP && amdgpu_sriov_vf(adev))) {
3905
3906	r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]);
3907	if (r)
3908	return r;
3909	}
3910	}
3911
3912	return `0`;
3913	}
3914
3915	/**
3916	* amdgpu_device_ip_resume_phase2 - run resume for hardware IPs
3917	*
3918	* @adev: amdgpu_device pointer
3919	*
3920	* Second resume function for hardware IPs. The list of all the hardware
3921	* IPs that make up the asic is walked and the resume callbacks are run for
3922	* all blocks except COMMON, GMC, and IH. resume puts the hardware into a
3923	* functional state after a suspend and updates the software state as
3924	* necessary. This function is also used for restoring the GPU after a GPU
3925	* reset.
3926	* Returns 0 on success, negative error code on failure.
3927	*/
3928	static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
3929	{
3930	int i, r;
3931
3932	for (i = `0`; i < adev->num_ip_blocks; i++) {
3933	if (!adev->ip_blocks[i].status.valid \|\| adev->ip_blocks[i].status.hw)
3934	continue;
3935	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON \|\|
3936	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC \|\|
3937	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH \|\|
3938	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE \|\|
3939	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
3940	continue;
3941	r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]);
3942	if (r)
3943	return r;
3944	}
3945
3946	return `0`;
3947	}
3948
3949	/**
3950	* amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
3951	*
3952	* @adev: amdgpu_device pointer
3953	*
3954	* Third resume function for hardware IPs. The list of all the hardware
3955	* IPs that make up the asic is walked and the resume callbacks are run for
3956	* all DCE. resume puts the hardware into a functional state after a suspend
3957	* and updates the software state as necessary. This function is also used
3958	* for restoring the GPU after a GPU reset.
3959	*
3960	* Returns 0 on success, negative error code on failure.
3961	*/
3962	static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
3963	{
3964	int i, r;
3965
3966	for (i = `0`; i < adev->num_ip_blocks; i++) {
3967	if (!adev->ip_blocks[i].status.valid \|\| adev->ip_blocks[i].status.hw)
3968	continue;
3969	if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
3970	r = amdgpu_ip_block_resume(ip_block: &adev->ip_blocks[i]);
3971	if (r)
3972	return r;
3973	}
3974	}
3975
3976	return `0`;
3977	}
3978
3979	/**
3980	* amdgpu_device_ip_resume - run resume for hardware IPs
3981	*
3982	* @adev: amdgpu_device pointer
3983	*
3984	* Main resume function for hardware IPs. The hardware IPs
3985	* are split into two resume functions because they are
3986	* also used in recovering from a GPU reset and some additional
3987	* steps need to be take between them. In this case (S3/S4) they are
3988	* run sequentially.
3989	* Returns 0 on success, negative error code on failure.
3990	*/
3991	static int amdgpu_device_ip_resume(struct amdgpu_device *adev)
3992	{
3993	int r;
3994
3995	r = amdgpu_device_ip_resume_phase1(adev);
3996	if (r)
3997	return r;
3998
3999	r = amdgpu_device_fw_loading(adev);
4000	if (r)
4001	return r;
4002
4003	r = amdgpu_device_ip_resume_phase2(adev);
4004
4005	if (adev->mman.buffer_funcs_ring->sched.ready)
4006	amdgpu_ttm_set_buffer_funcs_status(adev, enable: true);
4007
4008	if (r)
4009	return r;
4010
4011	amdgpu_fence_driver_hw_init(adev);
4012
4013	r = amdgpu_device_ip_resume_phase3(adev);
4014
4015	return r;
4016	}
4017
4018	/**
4019	* amdgpu_device_detect_sriov_bios - determine if the board supports SR-IOV
4020	*
4021	* @adev: amdgpu_device pointer
4022	*
4023	* Query the VBIOS data tables to determine if the board supports SR-IOV.
4024	*/
4025	static void amdgpu_device_detect_sriov_bios(struct amdgpu_device *adev)
4026	{
4027	if (amdgpu_sriov_vf(adev)) {
4028	if (adev->is_atom_fw) {
4029	if (amdgpu_atomfirmware_gpu_virtualization_supported(adev))
4030	adev->virt.caps \|= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
4031	} else {
4032	if (amdgpu_atombios_has_gpu_virtualization_table(adev))
4033	adev->virt.caps \|= AMDGPU_SRIOV_CAPS_SRIOV_VBIOS;
4034	}
4035
4036	if (!(adev->virt.caps & AMDGPU_SRIOV_CAPS_SRIOV_VBIOS))
4037	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_NO_VBIOS, error_flags: `0`, error_data: `0`);
4038	}
4039	}
4040
4041	/**
4042	* amdgpu_device_asic_has_dc_support - determine if DC supports the asic
4043	*
4044	* @asic_type: AMD asic type
4045	*
4046	* Check if there is DC (new modesetting infrastructre) support for an asic.
4047	* returns true if DC has support, false if not.
4048	*/
4049	bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
4050	{
4051	switch (asic_type) {
4052	#ifdef CONFIG_DRM_AMDGPU_SI
4053	case CHIP_HAINAN:
4054	#endif
4055	case CHIP_TOPAZ:
4056	/ chips with no display hardware /
4057	return false;
4058	#if defined(CONFIG_DRM_AMD_DC)
4059	case CHIP_TAHITI:
4060	case CHIP_PITCAIRN:
4061	case CHIP_VERDE:
4062	case CHIP_OLAND:
4063	/*
4064	* We have systems in the wild with these ASICs that require
4065	* LVDS and VGA support which is not supported with DC.
4066	*
4067	* Fallback to the non-DC driver here by default so as not to
4068	* cause regressions.
4069	*/
4070	#if defined(CONFIG_DRM_AMD_DC_SI)
4071	return amdgpu_dc > `0`;
4072	#else
4073	return false;
4074	#endif
4075	case CHIP_BONAIRE:
4076	case CHIP_KAVERI:
4077	case CHIP_KABINI:
4078	case CHIP_MULLINS:
4079	/*
4080	* We have systems in the wild with these ASICs that require
4081	* VGA support which is not supported with DC.
4082	*
4083	* Fallback to the non-DC driver here by default so as not to
4084	* cause regressions.
4085	*/
4086	return amdgpu_dc > `0`;
4087	default:
4088	return amdgpu_dc != `0`;
4089	#else
4090	default:
4091	if (amdgpu_dc > `0`)
4092	DRM_INFO_ONCE("Display Core has been requested via kernel parameter but isn't supported by ASIC, ignoring\n");
4093	return false;
4094	#endif
4095	}
4096	}
4097
4098	/**
4099	* amdgpu_device_has_dc_support - check if dc is supported
4100	*
4101	* @adev: amdgpu_device pointer
4102	*
4103	* Returns true for supported, false for not supported
4104	*/
4105	bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
4106	{
4107	if (adev->enable_virtual_display \|\|
4108	(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
4109	return false;
4110
4111	return amdgpu_device_asic_has_dc_support(asic_type: adev->asic_type);
4112	}
4113
4114	static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
4115	{
4116	struct amdgpu_device *adev =
4117	container_of(__work, struct amdgpu_device, xgmi_reset_work);
4118	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
4119
4120	/ It's a bug to not have a hive within this function /
4121	if (WARN_ON(!hive))
4122	return;
4123
4124	/*
4125	* Use task barrier to synchronize all xgmi reset works across the
4126	* hive. task_barrier_enter and task_barrier_exit will block
4127	* until all the threads running the xgmi reset works reach
4128	* those points. task_barrier_full will do both blocks.
4129	*/
4130	if (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO) {
4131
4132	task_barrier_enter(tb: &hive->tb);
4133	adev->asic_reset_res = amdgpu_device_baco_enter(dev: adev_to_drm(adev));
4134
4135	if (adev->asic_reset_res)
4136	goto fail;
4137
4138	task_barrier_exit(tb: &hive->tb);
4139	adev->asic_reset_res = amdgpu_device_baco_exit(dev: adev_to_drm(adev));
4140
4141	if (adev->asic_reset_res)
4142	goto fail;
4143
4144	amdgpu_ras_reset_error_count(adev, block: AMDGPU_RAS_BLOCK__MMHUB);
4145	} else {
4146
4147	task_barrier_full(tb: &hive->tb);
4148	adev->asic_reset_res = amdgpu_asic_reset(adev);
4149	}
4150
4151	fail:
4152	if (adev->asic_reset_res)
4153	DRM_WARN("ASIC reset failed with error, %d for drm dev, %s",
4154	adev->asic_reset_res, adev_to_drm(adev)->unique);
4155	amdgpu_put_xgmi_hive(hive);
4156	}
4157
4158	static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
4159	{
4160	char *input = amdgpu_lockup_timeout;
4161	char *timeout_setting = NULL;
4162	int index = `0`;
4163	long timeout;
4164	int ret = `0`;
4165
4166	/*
4167	* By default timeout for non compute jobs is 10000
4168	* and 60000 for compute jobs.
4169	* In SR-IOV or passthrough mode, timeout for compute
4170	* jobs are 60000 by default.
4171	*/
4172	adev->gfx_timeout = msecs_to_jiffies(m: `10000`);
4173	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
4174	if (amdgpu_sriov_vf(adev))
4175	adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
4176	msecs_to_jiffies(m: `60000`) : msecs_to_jiffies(m: `10000`);
4177	else
4178	adev->compute_timeout = msecs_to_jiffies(m: `60000`);
4179
4180	if (strnlen(p: input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
4181	while ((timeout_setting = strsep(&input, ",")) &&
4182	strnlen(p: timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
4183	ret = kstrtol(s: timeout_setting, base: `0`, res: &timeout);
4184	if (ret)
4185	return ret;
4186
4187	if (timeout == `0`) {
4188	index++;
4189	continue;
4190	} else if (timeout < `0`) {
4191	timeout = MAX_SCHEDULE_TIMEOUT;
4192	dev_warn(adev->dev, "lockup timeout disabled");
4193	add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
4194	} else {
4195	timeout = msecs_to_jiffies(m: timeout);
4196	}
4197
4198	switch (index++) {
4199	case `0`:
4200	adev->gfx_timeout = timeout;
4201	break;
4202	case `1`:
4203	adev->compute_timeout = timeout;
4204	break;
4205	case `2`:
4206	adev->sdma_timeout = timeout;
4207	break;
4208	case `3`:
4209	adev->video_timeout = timeout;
4210	break;
4211	default:
4212	break;
4213	}
4214	}
4215	/*
4216	* There is only one value specified and
4217	* it should apply to all non-compute jobs.
4218	*/
4219	if (index == `1`) {
4220	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
4221	if (amdgpu_sriov_vf(adev) \|\| amdgpu_passthrough(adev))
4222	adev->compute_timeout = adev->gfx_timeout;
4223	}
4224	}
4225
4226	return ret;
4227	}
4228
4229	/**
4230	* amdgpu_device_check_iommu_direct_map - check if RAM direct mapped to GPU
4231	*
4232	* @adev: amdgpu_device pointer
4233	*
4234	* RAM direct mapped to GPU if IOMMU is not enabled or is pass through mode
4235	*/
4236	static void amdgpu_device_check_iommu_direct_map(struct amdgpu_device *adev)
4237	{
4238	struct iommu_domain *domain;
4239
4240	domain = iommu_get_domain_for_dev(dev: adev->dev);
4241	if (!domain \|\| domain->type == IOMMU_DOMAIN_IDENTITY)
4242	adev->ram_is_direct_mapped = true;
4243	}
4244
4245	#if defined(CONFIG_HSA_AMD_P2P)
4246	/**
4247	* amdgpu_device_check_iommu_remap - Check if DMA remapping is enabled.
4248	*
4249	* @adev: amdgpu_device pointer
4250	*
4251	* return if IOMMU remapping bar address
4252	*/
4253	static bool amdgpu_device_check_iommu_remap(struct amdgpu_device *adev)
4254	{
4255	struct iommu_domain *domain;
4256
4257	domain = iommu_get_domain_for_dev(dev: adev->dev);
4258	if (domain && (domain->type == IOMMU_DOMAIN_DMA \|\|
4259	domain->type == IOMMU_DOMAIN_DMA_FQ))
4260	return true;
4261
4262	return false;
4263	}
4264	#endif
4265
4266	static void amdgpu_device_set_mcbp(struct amdgpu_device *adev)
4267	{
4268	if (amdgpu_mcbp == `1`)
4269	adev->gfx.mcbp = true;
4270	else if (amdgpu_mcbp == `0`)
4271	adev->gfx.mcbp = false;
4272
4273	if (amdgpu_sriov_vf(adev))
4274	adev->gfx.mcbp = true;
4275
4276	if (adev->gfx.mcbp)
4277	DRM_INFO("MCBP is enabled\n");
4278	}
4279
4280	/**
4281	* amdgpu_device_init - initialize the driver
4282	*
4283	* @adev: amdgpu_device pointer
4284	* @flags: driver flags
4285	*
4286	* Initializes the driver info and hw (all asics).
4287	* Returns 0 for success or an error on failure.
4288	* Called at driver startup.
4289	*/
4290	int amdgpu_device_init(struct amdgpu_device *adev,
4291	uint32_t flags)
4292	{
4293	struct drm_device *ddev = adev_to_drm(adev);
4294	struct pci_dev *pdev = adev->pdev;
4295	int r, i;
4296	bool px = false;
4297	u32 max_MBps;
4298	int tmp;
4299
4300	adev->shutdown = false;
4301	adev->flags = flags;
4302
4303	if (amdgpu_force_asic_type >= `0` && amdgpu_force_asic_type < CHIP_LAST)
4304	adev->asic_type = amdgpu_force_asic_type;
4305	else
4306	adev->asic_type = flags & AMD_ASIC_MASK;
4307
4308	adev->usec_timeout = AMDGPU_MAX_USEC_TIMEOUT;
4309	if (amdgpu_emu_mode == `1`)
4310	adev->usec_timeout *= `10`;
4311	adev->gmc.gart_size = `512` * `1024` * `1024`;
4312	adev->accel_working = false;
4313	adev->num_rings = `0`;
4314	RCU_INIT_POINTER(adev->gang_submit, dma_fence_get_stub());
4315	adev->mman.buffer_funcs = NULL;
4316	adev->mman.buffer_funcs_ring = NULL;
4317	adev->vm_manager.vm_pte_funcs = NULL;
4318	adev->vm_manager.vm_pte_num_scheds = `0`;
4319	adev->gmc.gmc_funcs = NULL;
4320	adev->harvest_ip_mask = `0x0`;
4321	adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS);
4322	bitmap_zero(dst: adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
4323
4324	adev->smc_rreg = &amdgpu_invalid_rreg;
4325	adev->smc_wreg = &amdgpu_invalid_wreg;
4326	adev->pcie_rreg = &amdgpu_invalid_rreg;
4327	adev->pcie_wreg = &amdgpu_invalid_wreg;
4328	adev->pcie_rreg_ext = &amdgpu_invalid_rreg_ext;
4329	adev->pcie_wreg_ext = &amdgpu_invalid_wreg_ext;
4330	adev->pciep_rreg = &amdgpu_invalid_rreg;
4331	adev->pciep_wreg = &amdgpu_invalid_wreg;
4332	adev->pcie_rreg64 = &amdgpu_invalid_rreg64;
4333	adev->pcie_wreg64 = &amdgpu_invalid_wreg64;
4334	adev->pcie_rreg64_ext = &amdgpu_invalid_rreg64_ext;
4335	adev->pcie_wreg64_ext = &amdgpu_invalid_wreg64_ext;
4336	adev->uvd_ctx_rreg = &amdgpu_invalid_rreg;
4337	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
4338	adev->didt_rreg = &amdgpu_invalid_rreg;
4339	adev->didt_wreg = &amdgpu_invalid_wreg;
4340	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
4341	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
4342	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
4343	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;
4344
4345	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
4346	amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
4347	pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
4348
4349	/ mutex initialization are all done here so we*
4350	* can recall function without having locking issues
4351	*/
4352	mutex_init(&adev->firmware.mutex);
4353	mutex_init(&adev->pm.mutex);
4354	mutex_init(&adev->gfx.gpu_clock_mutex);
4355	mutex_init(&adev->srbm_mutex);
4356	mutex_init(&adev->gfx.pipe_reserve_mutex);
4357	mutex_init(&adev->gfx.gfx_off_mutex);
4358	mutex_init(&adev->gfx.partition_mutex);
4359	mutex_init(&adev->grbm_idx_mutex);
4360	mutex_init(&adev->mn_lock);
4361	mutex_init(&adev->virt.vf_errors.lock);
4362	hash_init(adev->mn_hash);
4363	mutex_init(&adev->psp.mutex);
4364	mutex_init(&adev->notifier_lock);
4365	mutex_init(&adev->pm.stable_pstate_ctx_lock);
4366	mutex_init(&adev->benchmark_mutex);
4367	mutex_init(&adev->gfx.reset_sem_mutex);
4368	/ Initialize the mutex for cleaner shader isolation between GFX and compute processes /
4369	mutex_init(&adev->enforce_isolation_mutex);
4370	for (i = `0`; i < MAX_XCP; ++i) {
4371	adev->isolation[i].spearhead = dma_fence_get_stub();
4372	amdgpu_sync_create(sync: &adev->isolation[i].active);
4373	amdgpu_sync_create(sync: &adev->isolation[i].prev);
4374	}
4375	mutex_init(&adev->gfx.userq_sch_mutex);
4376	mutex_init(&adev->gfx.workload_profile_mutex);
4377	mutex_init(&adev->vcn.workload_profile_mutex);
4378	mutex_init(&adev->userq_mutex);
4379
4380	amdgpu_device_init_apu_flags(adev);
4381
4382	r = amdgpu_device_check_arguments(adev);
4383	if (r)
4384	return r;
4385
4386	spin_lock_init(&adev->mmio_idx_lock);
4387	spin_lock_init(&adev->smc_idx_lock);
4388	spin_lock_init(&adev->pcie_idx_lock);
4389	spin_lock_init(&adev->uvd_ctx_idx_lock);
4390	spin_lock_init(&adev->didt_idx_lock);
4391	spin_lock_init(&adev->gc_cac_idx_lock);
4392	spin_lock_init(&adev->se_cac_idx_lock);
4393	spin_lock_init(&adev->audio_endpt_idx_lock);
4394	spin_lock_init(&adev->mm_stats.lock);
4395	spin_lock_init(&adev->virt.rlcg_reg_lock);
4396	spin_lock_init(&adev->wb.lock);
4397
4398	xa_init_flags(xa: &adev->userq_xa, XA_FLAGS_LOCK_IRQ);
4399
4400	INIT_LIST_HEAD(list: &adev->reset_list);
4401
4402	INIT_LIST_HEAD(list: &adev->ras_list);
4403
4404	INIT_LIST_HEAD(list: &adev->pm.od_kobj_list);
4405
4406	INIT_LIST_HEAD(list: &adev->userq_mgr_list);
4407
4408	INIT_DELAYED_WORK(&adev->delayed_init_work,
4409	amdgpu_device_delayed_init_work_handler);
4410	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
4411	amdgpu_device_delay_enable_gfx_off);
4412	/*
4413	* Initialize the enforce_isolation work structures for each XCP
4414	* partition. This work handler is responsible for enforcing shader
4415	* isolation on AMD GPUs. It counts the number of emitted fences for
4416	* each GFX and compute ring. If there are any fences, it schedules
4417	* the `enforce_isolation_work` to be run after a delay. If there are
4418	* no fences, it signals the Kernel Fusion Driver (KFD) to resume the
4419	* runqueue.
4420	*/
4421	for (i = `0`; i < MAX_XCP; i++) {
4422	INIT_DELAYED_WORK(&adev->gfx.enforce_isolation[i].work,
4423	amdgpu_gfx_enforce_isolation_handler);
4424	adev->gfx.enforce_isolation[i].adev = adev;
4425	adev->gfx.enforce_isolation[i].xcp_id = i;
4426	}
4427
4428	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
4429
4430	adev->gfx.gfx_off_req_count = `1`;
4431	adev->gfx.gfx_off_residency = `0`;
4432	adev->gfx.gfx_off_entrycount = `0`;
4433	adev->pm.ac_power = power_supply_is_system_supplied() > `0`;
4434
4435	atomic_set(v: &adev->throttling_logging_enabled, i: `1`);
4436	/*
4437	* If throttling continues, logging will be performed every minute
4438	* to avoid log flooding. "-1" is subtracted since the thermal
4439	* throttling interrupt comes every second. Thus, the total logging
4440	* interval is 59 seconds(retelimited printk interval) + 1(waiting
4441	* for throttling interrupt) = 60 seconds.
4442	*/
4443	ratelimit_state_init(rs: &adev->throttling_logging_rs, interval: (`60` - `1`) * HZ, burst: `1`);
4444
4445	ratelimit_set_flags(rs: &adev->throttling_logging_rs, RATELIMIT_MSG_ON_RELEASE);
4446
4447	/ Registers mapping /
4448	/ TODO: block userspace mapping of io register /
4449	if (adev->asic_type >= CHIP_BONAIRE) {
4450	adev->rmmio_base = pci_resource_start(adev->pdev, `5`);
4451	adev->rmmio_size = pci_resource_len(adev->pdev, `5`);
4452	} else {
4453	adev->rmmio_base = pci_resource_start(adev->pdev, `2`);
4454	adev->rmmio_size = pci_resource_len(adev->pdev, `2`);
4455	}
4456
4457	for (i = `0`; i < AMD_IP_BLOCK_TYPE_NUM; i++)
4458	atomic_set(v: &adev->pm.pwr_state[i], i: POWER_STATE_UNKNOWN);
4459
4460	adev->rmmio = ioremap(offset: adev->rmmio_base, size: adev->rmmio_size);
4461	if (!adev->rmmio)
4462	return -ENOMEM;
4463
4464	DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base);
4465	DRM_INFO("register mmio size: %u\n", (unsigned int)adev->rmmio_size);
4466
4467	/*
4468	* Reset domain needs to be present early, before XGMI hive discovered
4469	* (if any) and initialized to use reset sem and in_gpu reset flag
4470	* early on during init and before calling to RREG32.
4471	*/
4472	adev->reset_domain = amdgpu_reset_create_reset_domain(type: SINGLE_DEVICE, wq_name: "amdgpu-reset-dev");
4473	if (!adev->reset_domain)
4474	return -ENOMEM;
4475
4476	/ detect hw virtualization here /
4477	amdgpu_virt_init(adev);
4478
4479	amdgpu_device_get_pcie_info(adev);
4480
4481	r = amdgpu_device_get_job_timeout_settings(adev);
4482	if (r) {
4483	dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
4484	return r;
4485	}
4486
4487	amdgpu_device_set_mcbp(adev);
4488
4489	/*
4490	* By default, use default mode where all blocks are expected to be
4491	* initialized. At present a 'swinit' of blocks is required to be
4492	* completed before the need for a different level is detected.
4493	*/
4494	amdgpu_set_init_level(adev, lvl: AMDGPU_INIT_LEVEL_DEFAULT);
4495	/ early init functions /
4496	r = amdgpu_device_ip_early_init(adev);
4497	if (r)
4498	return r;
4499
4500	/*
4501	* No need to remove conflicting FBs for non-display class devices.
4502	* This prevents the sysfb from being freed accidently.
4503	*/
4504	if ((pdev->class >> `8`) == PCI_CLASS_DISPLAY_VGA \|\|
4505	(pdev->class >> `8`) == PCI_CLASS_DISPLAY_OTHER) {
4506	/ Get rid of things like offb /
4507	r = aperture_remove_conflicting_pci_devices(pdev: adev->pdev, name: amdgpu_kms_driver.name);
4508	if (r)
4509	return r;
4510	}
4511
4512	/ Enable TMZ based on IP_VERSION /
4513	amdgpu_gmc_tmz_set(adev);
4514
4515	if (amdgpu_sriov_vf(adev) &&
4516	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) >= IP_VERSION(`10`, `3`, `0`))
4517	/ VF MMIO access (except mailbox range) from CPU*
4518	* will be blocked during sriov runtime
4519	*/
4520	adev->virt.caps \|= AMDGPU_VF_MMIO_ACCESS_PROTECT;
4521
4522	amdgpu_gmc_noretry_set(adev);
4523	/ Need to get xgmi info early to decide the reset behavior/
4524	if (adev->gmc.xgmi.supported) {
4525	r = adev->gfxhub.funcs->get_xgmi_info(adev);
4526	if (r)
4527	return r;
4528	}
4529
4530	/ enable PCIE atomic ops /
4531	if (amdgpu_sriov_vf(adev)) {
4532	if (adev->virt.fw_reserve.p_pf2vf)
4533	adev->have_atomics_support = ((struct amd_sriov_msg_pf2vf_info *)
4534	adev->virt.fw_reserve.p_pf2vf)->pcie_atomic_ops_support_flags ==
4535	(PCI_EXP_DEVCAP2_ATOMIC_COMP32 \| PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4536	/ APUs w/ gfx9 onwards doesn't reply on PCIe atomics, rather it is a*
4537	* internal path natively support atomics, set have_atomics_support to true.
4538	*/
4539	} else if ((adev->flags & AMD_IS_APU) &&
4540	(amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) >
4541	IP_VERSION(`9`, `0`, `0`))) {
4542	adev->have_atomics_support = true;
4543	} else {
4544	adev->have_atomics_support =
4545	!pci_enable_atomic_ops_to_root(dev: adev->pdev,
4546	PCI_EXP_DEVCAP2_ATOMIC_COMP32 \|
4547	PCI_EXP_DEVCAP2_ATOMIC_COMP64);
4548	}
4549
4550	if (!adev->have_atomics_support)
4551	dev_info(adev->dev, "PCIE atomic ops is not supported\n");
4552
4553	/ doorbell bar mapping and doorbell index init/
4554	amdgpu_doorbell_init(adev);
4555
4556	if (amdgpu_emu_mode == `1`) {
4557	/ post the asic on emulation mode /
4558	emu_soc_asic_init(adev);
4559	goto fence_driver_init;
4560	}
4561
4562	amdgpu_reset_init(adev);
4563
4564	/ detect if we are with an SRIOV vbios /
4565	if (adev->bios)
4566	amdgpu_device_detect_sriov_bios(adev);
4567
4568	/ check if we need to reset the asic*
4569	* E.g., driver was not cleanly unloaded previously, etc.
4570	*/
4571	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
4572	if (adev->gmc.xgmi.num_physical_nodes) {
4573	dev_info(adev->dev, "Pending hive reset.\n");
4574	amdgpu_set_init_level(adev,
4575	lvl: AMDGPU_INIT_LEVEL_MINIMAL_XGMI);
4576	} else if (amdgpu_ip_version(adev, ip: MP1_HWIP, inst: `0`) == IP_VERSION(`13`, `0`, `10`) &&
4577	!amdgpu_device_has_display_hardware(adev)) {
4578	r = psp_gpu_reset(adev);
4579	} else {
4580	tmp = amdgpu_reset_method;
4581	/ It should do a default reset when loading or reloading the driver,*
4582	* regardless of the module parameter reset_method.
4583	*/
4584	amdgpu_reset_method = AMD_RESET_METHOD_NONE;
4585	r = amdgpu_asic_reset(adev);
4586	amdgpu_reset_method = tmp;
4587	}
4588
4589	if (r) {
4590	dev_err(adev->dev, "asic reset on init failed\n");
4591	goto failed;
4592	}
4593	}
4594
4595	/ Post card if necessary /
4596	if (amdgpu_device_need_post(adev)) {
4597	if (!adev->bios) {
4598	dev_err(adev->dev, "no vBIOS found\n");
4599	r = -EINVAL;
4600	goto failed;
4601	}
4602	DRM_INFO("GPU posting now...\n");
4603	r = amdgpu_device_asic_init(adev);
4604	if (r) {
4605	dev_err(adev->dev, "gpu post error!\n");
4606	goto failed;
4607	}
4608	}
4609
4610	if (adev->bios) {
4611	if (adev->is_atom_fw) {
4612	/ Initialize clocks /
4613	r = amdgpu_atomfirmware_get_clock_info(adev);
4614	if (r) {
4615	dev_err(adev->dev, "amdgpu_atomfirmware_get_clock_info failed\n");
4616	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, error_flags: `0`, error_data: `0`);
4617	goto failed;
4618	}
4619	} else {
4620	/ Initialize clocks /
4621	r = amdgpu_atombios_get_clock_info(adev);
4622	if (r) {
4623	dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
4624	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL, error_flags: `0`, error_data: `0`);
4625	goto failed;
4626	}
4627	/ init i2c buses /
4628	amdgpu_i2c_init(adev);
4629	}
4630	}
4631
4632	fence_driver_init:
4633	/ Fence driver /
4634	r = amdgpu_fence_driver_sw_init(adev);
4635	if (r) {
4636	dev_err(adev->dev, "amdgpu_fence_driver_sw_init failed\n");
4637	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_FENCE_INIT_FAIL, error_flags: `0`, error_data: `0`);
4638	goto failed;
4639	}
4640
4641	/ init the mode config /
4642	drm_mode_config_init(dev: adev_to_drm(adev));
4643
4644	r = amdgpu_device_ip_init(adev);
4645	if (r) {
4646	dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
4647	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, error_flags: `0`, error_data: `0`);
4648	goto release_ras_con;
4649	}
4650
4651	amdgpu_fence_driver_hw_init(adev);
4652
4653	dev_info(adev->dev,
4654	"SE %d, SH per SE %d, CU per SH %d, active_cu_number %d\n",
4655	adev->gfx.config.max_shader_engines,
4656	adev->gfx.config.max_sh_per_se,
4657	adev->gfx.config.max_cu_per_sh,
4658	adev->gfx.cu_info.number);
4659
4660	adev->accel_working = true;
4661
4662	amdgpu_vm_check_compute_bug(adev);
4663
4664	/ Initialize the buffer migration limit. /
4665	if (amdgpu_moverate >= `0`)
4666	max_MBps = amdgpu_moverate;
4667	else
4668	max_MBps = `8`; / Allow 8 MB/s. /
4669	/ Get a log2 for easy divisions. /
4670	adev->mm_stats.log2_max_MBps = ilog2(max(`1u`, max_MBps));
4671
4672	/*
4673	* Register gpu instance before amdgpu_device_enable_mgpu_fan_boost.
4674	* Otherwise the mgpu fan boost feature will be skipped due to the
4675	* gpu instance is counted less.
4676	*/
4677	amdgpu_register_gpu_instance(adev);
4678
4679	/ enable clockgating, etc. after ib tests, etc. since some blocks require*
4680	* explicit gating rather than handling it automatically.
4681	*/
4682	if (adev->init_lvl->level != AMDGPU_INIT_LEVEL_MINIMAL_XGMI) {
4683	r = amdgpu_device_ip_late_init(adev);
4684	if (r) {
4685	dev_err(adev->dev, "amdgpu_device_ip_late_init failed\n");
4686	amdgpu_vf_error_put(adev, sub_error_code: AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL, error_flags: `0`, error_data: r);
4687	goto release_ras_con;
4688	}
4689	/ must succeed. /
4690	amdgpu_ras_resume(adev);
4691	queue_delayed_work(wq: system_wq, dwork: &adev->delayed_init_work,
4692	delay: msecs_to_jiffies(AMDGPU_RESUME_MS));
4693	}
4694
4695	if (amdgpu_sriov_vf(adev)) {
4696	amdgpu_virt_release_full_gpu(adev, init: true);
4697	flush_delayed_work(dwork: &adev->delayed_init_work);
4698	}
4699
4700	/*
4701	* Place those sysfs registering after `late_init`. As some of those
4702	* operations performed in `late_init` might affect the sysfs
4703	* interfaces creating.
4704	*/
4705	r = amdgpu_atombios_sysfs_init(adev);
4706	if (r)
4707	drm_err(&adev->ddev,
4708	"registering atombios sysfs failed (%d).\n", r);
4709
4710	r = amdgpu_pm_sysfs_init(adev);
4711	if (r)
4712	DRM_ERROR("registering pm sysfs failed (%d).\n", r);
4713
4714	r = amdgpu_ucode_sysfs_init(adev);
4715	if (r) {
4716	adev->ucode_sysfs_en = false;
4717	DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
4718	} else
4719	adev->ucode_sysfs_en = true;
4720
4721	r = amdgpu_device_attr_sysfs_init(adev);
4722	if (r)
4723	dev_err(adev->dev, "Could not create amdgpu device attr\n");
4724
4725	r = devm_device_add_group(dev: adev->dev, grp: &amdgpu_board_attrs_group);
4726	if (r)
4727	dev_err(adev->dev,
4728	"Could not create amdgpu board attributes\n");
4729
4730	amdgpu_fru_sysfs_init(adev);
4731	amdgpu_reg_state_sysfs_init(adev);
4732	amdgpu_xcp_sysfs_init(adev);
4733
4734	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4735	r = amdgpu_pmu_init(adev);
4736	if (r)
4737	dev_err(adev->dev, "amdgpu_pmu_init failed\n");
4738
4739	/ Have stored pci confspace at hand for restore in sudden PCI error /
4740	if (amdgpu_device_cache_pci_state(pdev: adev->pdev))
4741	pci_restore_state(dev: pdev);
4742
4743	/ if we have > 1 VGA cards, then disable the amdgpu VGA resources /
4744	/ this will fail for cards that aren't VGA class devices, just*
4745	* ignore it
4746	*/
4747	if ((adev->pdev->class >> `8`) == PCI_CLASS_DISPLAY_VGA)
4748	vga_client_register(pdev: adev->pdev, set_decode: amdgpu_device_vga_set_decode);
4749
4750	px = amdgpu_device_supports_px(dev: ddev);
4751
4752	if (px \|\| (!dev_is_removable(dev: &adev->pdev->dev) &&
4753	apple_gmux_detect(NULL, NULL)))
4754	vga_switcheroo_register_client(dev: adev->pdev,
4755	ops: &amdgpu_switcheroo_ops, driver_power_control: px);
4756
4757	if (px)
4758	vga_switcheroo_init_domain_pm_ops(dev: adev->dev, domain: &adev->vga_pm_domain);
4759
4760	if (adev->init_lvl->level == AMDGPU_INIT_LEVEL_MINIMAL_XGMI)
4761	amdgpu_xgmi_reset_on_init(adev);
4762
4763	amdgpu_device_check_iommu_direct_map(adev);
4764
4765	adev->pm_nb.notifier_call = amdgpu_device_pm_notifier;
4766	r = register_pm_notifier(nb: &adev->pm_nb);
4767	if (r)
4768	goto failed;
4769
4770	return `0`;
4771
4772	release_ras_con:
4773	if (amdgpu_sriov_vf(adev))
4774	amdgpu_virt_release_full_gpu(adev, init: true);
4775
4776	/ failed in exclusive mode due to timeout /
4777	if (amdgpu_sriov_vf(adev) &&
4778	!amdgpu_sriov_runtime(adev) &&
4779	amdgpu_virt_mmio_blocked(adev) &&
4780	!amdgpu_virt_wait_reset(adev)) {
4781	dev_err(adev->dev, "VF exclusive mode timeout\n");
4782	/ Don't send request since VF is inactive. /
4783	adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
4784	adev->virt.ops = NULL;
4785	r = -EAGAIN;
4786	}
4787	amdgpu_release_ras_context(adev);
4788
4789	failed:
4790	amdgpu_vf_error_trans_all(adev);
4791
4792	return r;
4793	}
4794
4795	static void amdgpu_device_unmap_mmio(struct amdgpu_device *adev)
4796	{
4797
4798	/ Clear all CPU mappings pointing to this device /
4799	unmap_mapping_range(mapping: adev->ddev.anon_inode->i_mapping, holebegin: `0`, holelen: `0`, even_cows: `1`);
4800
4801	/ Unmap all mapped bars - Doorbell, registers and VRAM /
4802	amdgpu_doorbell_fini(adev);
4803
4804	iounmap(addr: adev->rmmio);
4805	adev->rmmio = NULL;
4806	if (adev->mman.aper_base_kaddr)
4807	iounmap(addr: adev->mman.aper_base_kaddr);
4808	adev->mman.aper_base_kaddr = NULL;
4809
4810	/ Memory manager related /
4811	if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
4812	arch_phys_wc_del(handle: adev->gmc.vram_mtrr);
4813	arch_io_free_memtype_wc(start: adev->gmc.aper_base, size: adev->gmc.aper_size);
4814	}
4815	}
4816
4817	/**
4818	* amdgpu_device_fini_hw - tear down the driver
4819	*
4820	* @adev: amdgpu_device pointer
4821	*
4822	* Tear down the driver info (all asics).
4823	* Called at driver shutdown.
4824	*/
4825	void amdgpu_device_fini_hw(struct amdgpu_device *adev)
4826	{
4827	dev_info(adev->dev, "amdgpu: finishing device.\n");
4828	flush_delayed_work(dwork: &adev->delayed_init_work);
4829
4830	if (adev->mman.initialized)
4831	drain_workqueue(wq: adev->mman.bdev.wq);
4832	adev->shutdown = true;
4833
4834	unregister_pm_notifier(nb: &adev->pm_nb);
4835
4836	/ make sure IB test finished before entering exclusive mode*
4837	* to avoid preemption on IB test
4838	*/
4839	if (amdgpu_sriov_vf(adev)) {
4840	amdgpu_virt_request_full_gpu(adev, init: false);
4841	amdgpu_virt_fini_data_exchange(adev);
4842	}
4843
4844	/ disable all interrupts /
4845	amdgpu_irq_disable_all(adev);
4846	if (adev->mode_info.mode_config_initialized) {
4847	if (!drm_drv_uses_atomic_modeset(dev: adev_to_drm(adev)))
4848	drm_helper_force_disable_all(dev: adev_to_drm(adev));
4849	else
4850	drm_atomic_helper_shutdown(dev: adev_to_drm(adev));
4851	}
4852	amdgpu_fence_driver_hw_fini(adev);
4853
4854	if (adev->pm.sysfs_initialized)
4855	amdgpu_pm_sysfs_fini(adev);
4856	if (adev->ucode_sysfs_en)
4857	amdgpu_ucode_sysfs_fini(adev);
4858	amdgpu_device_attr_sysfs_fini(adev);
4859	amdgpu_fru_sysfs_fini(adev);
4860
4861	amdgpu_reg_state_sysfs_fini(adev);
4862	amdgpu_xcp_sysfs_fini(adev);
4863
4864	/ disable ras feature must before hw fini /
4865	amdgpu_ras_pre_fini(adev);
4866
4867	amdgpu_ttm_set_buffer_funcs_status(adev, enable: false);
4868
4869	amdgpu_device_ip_fini_early(adev);
4870
4871	amdgpu_irq_fini_hw(adev);
4872
4873	if (adev->mman.initialized)
4874	ttm_device_clear_dma_mappings(bdev: &adev->mman.bdev);
4875
4876	amdgpu_gart_dummy_page_fini(adev);
4877
4878	if (drm_dev_is_unplugged(dev: adev_to_drm(adev)))
4879	amdgpu_device_unmap_mmio(adev);
4880
4881	}
4882
4883	void amdgpu_device_fini_sw(struct amdgpu_device *adev)
4884	{
4885	int i, idx;
4886	bool px;
4887
4888	amdgpu_device_ip_fini(adev);
4889	amdgpu_fence_driver_sw_fini(adev);
4890	amdgpu_ucode_release(fw: &adev->firmware.gpu_info_fw);
4891	adev->accel_working = false;
4892	dma_fence_put(rcu_dereference_protected(adev->gang_submit, true));
4893	for (i = `0`; i < MAX_XCP; ++i) {
4894	dma_fence_put(fence: adev->isolation[i].spearhead);
4895	amdgpu_sync_free(sync: &adev->isolation[i].active);
4896	amdgpu_sync_free(sync: &adev->isolation[i].prev);
4897	}
4898
4899	amdgpu_reset_fini(adev);
4900
4901	/ free i2c buses /
4902	amdgpu_i2c_fini(adev);
4903
4904	if (adev->bios) {
4905	if (amdgpu_emu_mode != `1`)
4906	amdgpu_atombios_fini(adev);
4907	amdgpu_bios_release(adev);
4908	}
4909
4910	kfree(objp: adev->fru_info);
4911	adev->fru_info = NULL;
4912
4913	kfree(objp: adev->xcp_mgr);
4914	adev->xcp_mgr = NULL;
4915
4916	px = amdgpu_device_supports_px(dev: adev_to_drm(adev));
4917
4918	if (px \|\| (!dev_is_removable(dev: &adev->pdev->dev) &&
4919	apple_gmux_detect(NULL, NULL)))
4920	vga_switcheroo_unregister_client(dev: adev->pdev);
4921
4922	if (px)
4923	vga_switcheroo_fini_domain_pm_ops(dev: adev->dev);
4924
4925	if ((adev->pdev->class >> `8`) == PCI_CLASS_DISPLAY_VGA)
4926	vga_client_unregister(pdev: adev->pdev);
4927
4928	if (drm_dev_enter(dev: adev_to_drm(adev), idx: &idx)) {
4929
4930	iounmap(addr: adev->rmmio);
4931	adev->rmmio = NULL;
4932	drm_dev_exit(idx);
4933	}
4934
4935	if (IS_ENABLED(CONFIG_PERF_EVENTS))
4936	amdgpu_pmu_fini(adev);
4937	if (adev->mman.discovery_bin)
4938	amdgpu_discovery_fini(adev);
4939
4940	amdgpu_reset_put_reset_domain(domain: adev->reset_domain);
4941	adev->reset_domain = NULL;
4942
4943	kfree(objp: adev->pci_state);
4944
4945	}
4946
4947	/**
4948	* amdgpu_device_evict_resources - evict device resources
4949	* @adev: amdgpu device object
4950	*
4951	* Evicts all ttm device resources(vram BOs, gart table) from the lru list
4952	* of the vram memory type. Mainly used for evicting device resources
4953	* at suspend time.
4954	*
4955	*/
4956	static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
4957	{
4958	int ret;
4959
4960	/ No need to evict vram on APUs unless going to S4 /
4961	if (!adev->in_s4 && (adev->flags & AMD_IS_APU))
4962	return `0`;
4963
4964	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
4965	if (ret)
4966	DRM_WARN("evicting device resources failed\n");
4967	return ret;
4968	}
4969
4970	/*
4971	* Suspend & resume.
4972	*/
4973	/**
4974	* amdgpu_device_pm_notifier - Notification block for Suspend/Hibernate events
4975	* @nb: notifier block
4976	* @mode: suspend mode
4977	* @data: data
4978	*
4979	* This function is called when the system is about to suspend or hibernate.
4980	* It is used to set the appropriate flags so that eviction can be optimized
4981	* in the pm prepare callback.
4982	*/
4983	static int amdgpu_device_pm_notifier(struct notifier_block nb, unsigned* long mode,
4984	void *data)
4985	{
4986	struct amdgpu_device adev = container_of(nb, struct* amdgpu_device, pm_nb);
4987
4988	switch (mode) {
4989	case PM_HIBERNATION_PREPARE:
4990	adev->in_s4 = true;
4991	break;
4992	case PM_POST_HIBERNATION:
4993	adev->in_s4 = false;
4994	break;
4995	}
4996
4997	return NOTIFY_DONE;
4998	}
4999
5000	/**
5001	* amdgpu_device_prepare - prepare for device suspend
5002	*
5003	* @dev: drm dev pointer
5004	*
5005	* Prepare to put the hw in the suspend state (all asics).
5006	* Returns 0 for success or an error on failure.
5007	* Called at driver suspend.
5008	*/
5009	int amdgpu_device_prepare(struct drm_device *dev)
5010	{
5011	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
5012	int i, r;
5013
5014	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5015	return `0`;
5016
5017	/ Evict the majority of BOs before starting suspend sequence /
5018	r = amdgpu_device_evict_resources(adev);
5019	if (r)
5020	return r;
5021
5022	flush_delayed_work(dwork: &adev->gfx.gfx_off_delay_work);
5023
5024	for (i = `0`; i < adev->num_ip_blocks; i++) {
5025	if (!adev->ip_blocks[i].status.valid)
5026	continue;
5027	if (!adev->ip_blocks[i].version->funcs->prepare_suspend)
5028	continue;
5029	r = adev->ip_blocks[i].version->funcs->prepare_suspend(&adev->ip_blocks[i]);
5030	if (r)
5031	return r;
5032	}
5033
5034	return `0`;
5035	}
5036
5037	/**
5038	* amdgpu_device_suspend - initiate device suspend
5039	*
5040	* @dev: drm dev pointer
5041	* @notify_clients: notify in-kernel DRM clients
5042	*
5043	* Puts the hw in the suspend state (all asics).
5044	* Returns 0 for success or an error on failure.
5045	* Called at driver suspend.
5046	*/
5047	int amdgpu_device_suspend(struct drm_device *dev, bool notify_clients)
5048	{
5049	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
5050	int r = `0`;
5051
5052	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5053	return `0`;
5054
5055	adev->in_suspend = true;
5056
5057	if (amdgpu_sriov_vf(adev)) {
5058	amdgpu_virt_fini_data_exchange(adev);
5059	r = amdgpu_virt_request_full_gpu(adev, init: false);
5060	if (r)
5061	return r;
5062	}
5063
5064	if (amdgpu_acpi_smart_shift_update(dev, ss_state: AMDGPU_SS_DEV_D3))
5065	DRM_WARN("smart shift update failed\n");
5066
5067	if (notify_clients)
5068	drm_client_dev_suspend(dev: adev_to_drm(adev), holds_console_lock: false);
5069
5070	cancel_delayed_work_sync(dwork: &adev->delayed_init_work);
5071
5072	amdgpu_ras_suspend(adev);
5073
5074	amdgpu_device_ip_suspend_phase1(adev);
5075
5076	if (!adev->in_s0ix) {
5077	amdgpu_amdkfd_suspend(adev, run_pm: adev->in_runpm);
5078	amdgpu_userq_suspend(adev);
5079	}
5080
5081	r = amdgpu_device_evict_resources(adev);
5082	if (r)
5083	return r;
5084
5085	amdgpu_ttm_set_buffer_funcs_status(adev, enable: false);
5086
5087	amdgpu_fence_driver_hw_fini(adev);
5088
5089	amdgpu_device_ip_suspend_phase2(adev);
5090
5091	if (amdgpu_sriov_vf(adev))
5092	amdgpu_virt_release_full_gpu(adev, init: false);
5093
5094	r = amdgpu_dpm_notify_rlc_state(adev, en: false);
5095	if (r)
5096	return r;
5097
5098	return `0`;
5099	}
5100
5101	/**
5102	* amdgpu_device_resume - initiate device resume
5103	*
5104	* @dev: drm dev pointer
5105	* @notify_clients: notify in-kernel DRM clients
5106	*
5107	* Bring the hw back to operating state (all asics).
5108	* Returns 0 for success or an error on failure.
5109	* Called at driver resume.
5110	*/
5111	int amdgpu_device_resume(struct drm_device *dev, bool notify_clients)
5112	{
5113	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
5114	int r = `0`;
5115
5116	if (amdgpu_sriov_vf(adev)) {
5117	r = amdgpu_virt_request_full_gpu(adev, init: true);
5118	if (r)
5119	return r;
5120	}
5121
5122	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
5123	return `0`;
5124
5125	if (adev->in_s0ix)
5126	amdgpu_dpm_gfx_state_change(adev, state: sGpuChangeState_D0Entry);
5127
5128	/ post card /
5129	if (amdgpu_device_need_post(adev)) {
5130	r = amdgpu_device_asic_init(adev);
5131	if (r)
5132	dev_err(adev->dev, "amdgpu asic init failed\n");
5133	}
5134
5135	r = amdgpu_device_ip_resume(adev);
5136
5137	if (r) {
5138	dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
5139	goto exit;
5140	}
5141
5142	if (!adev->in_s0ix) {
5143	r = amdgpu_amdkfd_resume(adev, run_pm: adev->in_runpm);
5144	if (r)
5145	goto exit;
5146
5147	r = amdgpu_userq_resume(adev);
5148	if (r)
5149	goto exit;
5150	}
5151
5152	r = amdgpu_device_ip_late_init(adev);
5153	if (r)
5154	goto exit;
5155
5156	queue_delayed_work(wq: system_wq, dwork: &adev->delayed_init_work,
5157	delay: msecs_to_jiffies(AMDGPU_RESUME_MS));
5158	exit:
5159	if (amdgpu_sriov_vf(adev)) {
5160	amdgpu_virt_init_data_exchange(adev);
5161	amdgpu_virt_release_full_gpu(adev, init: true);
5162	}
5163
5164	if (r)
5165	return r;
5166
5167	/ Make sure IB tests flushed /
5168	flush_delayed_work(dwork: &adev->delayed_init_work);
5169
5170	if (notify_clients)
5171	drm_client_dev_resume(dev: adev_to_drm(adev), holds_console_lock: false);
5172
5173	amdgpu_ras_resume(adev);
5174
5175	if (adev->mode_info.num_crtc) {
5176	/*
5177	* Most of the connector probing functions try to acquire runtime pm
5178	* refs to ensure that the GPU is powered on when connector polling is
5179	* performed. Since we're calling this from a runtime PM callback,
5180	* trying to acquire rpm refs will cause us to deadlock.
5181	*
5182	* Since we're guaranteed to be holding the rpm lock, it's safe to
5183	* temporarily disable the rpm helpers so this doesn't deadlock us.
5184	*/
5185	#ifdef CONFIG_PM
5186	dev->dev->power.disable_depth++;
5187	#endif
5188	if (!adev->dc_enabled)
5189	drm_helper_hpd_irq_event(dev);
5190	else
5191	drm_kms_helper_hotplug_event(dev);
5192	#ifdef CONFIG_PM
5193	dev->dev->power.disable_depth--;
5194	#endif
5195	}
5196	adev->in_suspend = false;
5197
5198	if (amdgpu_acpi_smart_shift_update(dev, ss_state: AMDGPU_SS_DEV_D0))
5199	DRM_WARN("smart shift update failed\n");
5200
5201	return `0`;
5202	}
5203
5204	/**
5205	* amdgpu_device_ip_check_soft_reset - did soft reset succeed
5206	*
5207	* @adev: amdgpu_device pointer
5208	*
5209	* The list of all the hardware IPs that make up the asic is walked and
5210	* the check_soft_reset callbacks are run. check_soft_reset determines
5211	* if the asic is still hung or not.
5212	* Returns true if any of the IPs are still in a hung state, false if not.
5213	*/
5214	static bool amdgpu_device_ip_check_soft_reset(struct amdgpu_device *adev)
5215	{
5216	int i;
5217	bool asic_hang = false;
5218
5219	if (amdgpu_sriov_vf(adev))
5220	return true;
5221
5222	if (amdgpu_asic_need_full_reset(adev))
5223	return true;
5224
5225	for (i = `0`; i < adev->num_ip_blocks; i++) {
5226	if (!adev->ip_blocks[i].status.valid)
5227	continue;
5228	if (adev->ip_blocks[i].version->funcs->check_soft_reset)
5229	adev->ip_blocks[i].status.hang =
5230	adev->ip_blocks[i].version->funcs->check_soft_reset(
5231	&adev->ip_blocks[i]);
5232	if (adev->ip_blocks[i].status.hang) {
5233	dev_info(adev->dev, "IP block:%s is hung!\n", adev->ip_blocks[i].version->funcs->name);
5234	asic_hang = true;
5235	}
5236	}
5237	return asic_hang;
5238	}
5239
5240	/**
5241	* amdgpu_device_ip_pre_soft_reset - prepare for soft reset
5242	*
5243	* @adev: amdgpu_device pointer
5244	*
5245	* The list of all the hardware IPs that make up the asic is walked and the
5246	* pre_soft_reset callbacks are run if the block is hung. pre_soft_reset
5247	* handles any IP specific hardware or software state changes that are
5248	* necessary for a soft reset to succeed.
5249	* Returns 0 on success, negative error code on failure.
5250	*/
5251	static int amdgpu_device_ip_pre_soft_reset(struct amdgpu_device *adev)
5252	{
5253	int i, r = `0`;
5254
5255	for (i = `0`; i < adev->num_ip_blocks; i++) {
5256	if (!adev->ip_blocks[i].status.valid)
5257	continue;
5258	if (adev->ip_blocks[i].status.hang &&
5259	adev->ip_blocks[i].version->funcs->pre_soft_reset) {
5260	r = adev->ip_blocks[i].version->funcs->pre_soft_reset(&adev->ip_blocks[i]);
5261	if (r)
5262	return r;
5263	}
5264	}
5265
5266	return `0`;
5267	}
5268
5269	/**
5270	* amdgpu_device_ip_need_full_reset - check if a full asic reset is needed
5271	*
5272	* @adev: amdgpu_device pointer
5273	*
5274	* Some hardware IPs cannot be soft reset. If they are hung, a full gpu
5275	* reset is necessary to recover.
5276	* Returns true if a full asic reset is required, false if not.
5277	*/
5278	static bool amdgpu_device_ip_need_full_reset(struct amdgpu_device *adev)
5279	{
5280	int i;
5281
5282	if (amdgpu_asic_need_full_reset(adev))
5283	return true;
5284
5285	for (i = `0`; i < adev->num_ip_blocks; i++) {
5286	if (!adev->ip_blocks[i].status.valid)
5287	continue;
5288	if ((adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) \|\|
5289	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) \|\|
5290	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_ACP) \|\|
5291	(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) \|\|
5292	adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) {
5293	if (adev->ip_blocks[i].status.hang) {
5294	dev_info(adev->dev, "Some block need full reset!\n");
5295	return true;
5296	}
5297	}
5298	}
5299	return false;
5300	}
5301
5302	/**
5303	* amdgpu_device_ip_soft_reset - do a soft reset
5304	*
5305	* @adev: amdgpu_device pointer
5306	*
5307	* The list of all the hardware IPs that make up the asic is walked and the
5308	* soft_reset callbacks are run if the block is hung. soft_reset handles any
5309	* IP specific hardware or software state changes that are necessary to soft
5310	* reset the IP.
5311	* Returns 0 on success, negative error code on failure.
5312	*/
5313	static int amdgpu_device_ip_soft_reset(struct amdgpu_device *adev)
5314	{
5315	int i, r = `0`;
5316
5317	for (i = `0`; i < adev->num_ip_blocks; i++) {
5318	if (!adev->ip_blocks[i].status.valid)
5319	continue;
5320	if (adev->ip_blocks[i].status.hang &&
5321	adev->ip_blocks[i].version->funcs->soft_reset) {
5322	r = adev->ip_blocks[i].version->funcs->soft_reset(&adev->ip_blocks[i]);
5323	if (r)
5324	return r;
5325	}
5326	}
5327
5328	return `0`;
5329	}
5330
5331	/**
5332	* amdgpu_device_ip_post_soft_reset - clean up from soft reset
5333	*
5334	* @adev: amdgpu_device pointer
5335	*
5336	* The list of all the hardware IPs that make up the asic is walked and the
5337	* post_soft_reset callbacks are run if the asic was hung. post_soft_reset
5338	* handles any IP specific hardware or software state changes that are
5339	* necessary after the IP has been soft reset.
5340	* Returns 0 on success, negative error code on failure.
5341	*/
5342	static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev)
5343	{
5344	int i, r = `0`;
5345
5346	for (i = `0`; i < adev->num_ip_blocks; i++) {
5347	if (!adev->ip_blocks[i].status.valid)
5348	continue;
5349	if (adev->ip_blocks[i].status.hang &&
5350	adev->ip_blocks[i].version->funcs->post_soft_reset)
5351	r = adev->ip_blocks[i].version->funcs->post_soft_reset(&adev->ip_blocks[i]);
5352	if (r)
5353	return r;
5354	}
5355
5356	return `0`;
5357	}
5358
5359	/**
5360	* amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
5361	*
5362	* @adev: amdgpu_device pointer
5363	* @reset_context: amdgpu reset context pointer
5364	*
5365	* do VF FLR and reinitialize Asic
5366	* return 0 means succeeded otherwise failed
5367	*/
5368	static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
5369	struct amdgpu_reset_context *reset_context)
5370	{
5371	int r;
5372	struct amdgpu_hive_info *hive = NULL;
5373
5374	if (test_bit(AMDGPU_HOST_FLR, &reset_context->flags)) {
5375	if (!amdgpu_ras_get_fed_status(adev))
5376	amdgpu_virt_ready_to_reset(adev);
5377	amdgpu_virt_wait_reset(adev);
5378	clear_bit(nr: AMDGPU_HOST_FLR, addr: &reset_context->flags);
5379	r = amdgpu_virt_request_full_gpu(adev, init: true);
5380	} else {
5381	r = amdgpu_virt_reset_gpu(adev);
5382	}
5383	if (r)
5384	return r;
5385
5386	amdgpu_ras_clear_err_state(adev);
5387	amdgpu_irq_gpu_reset_resume_helper(adev);
5388
5389	/ some sw clean up VF needs to do before recover /
5390	amdgpu_virt_post_reset(adev);
5391
5392	/ Resume IP prior to SMC /
5393	r = amdgpu_device_ip_reinit_early_sriov(adev);
5394	if (r)
5395	return r;
5396
5397	amdgpu_virt_init_data_exchange(adev);
5398
5399	r = amdgpu_device_fw_loading(adev);
5400	if (r)
5401	return r;
5402
5403	/ now we are okay to resume SMC/CP/SDMA /
5404	r = amdgpu_device_ip_reinit_late_sriov(adev);
5405	if (r)
5406	return r;
5407
5408	hive = amdgpu_get_xgmi_hive(adev);
5409	/ Update PSP FW topology after reset /
5410	if (hive && adev->gmc.xgmi.num_physical_nodes > `1`)
5411	r = amdgpu_xgmi_update_topology(hive, adev);
5412	if (hive)
5413	amdgpu_put_xgmi_hive(hive);
5414	if (r)
5415	return r;
5416
5417	r = amdgpu_ib_ring_tests(adev);
5418	if (r)
5419	return r;
5420
5421	if (adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST)
5422	amdgpu_inc_vram_lost(adev);
5423
5424	/ need to be called during full access so we can't do it later like*
5425	* bare-metal does.
5426	*/
5427	amdgpu_amdkfd_post_reset(adev);
5428	amdgpu_virt_release_full_gpu(adev, init: true);
5429
5430	/ Aldebaran and gfx_11_0_3 support ras in SRIOV, so need resume ras during reset /
5431	if (amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `2`) \|\|
5432	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `3`) \|\|
5433	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `4`) \|\|
5434	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `5`, `0`) \|\|
5435	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`11`, `0`, `3`))
5436	amdgpu_ras_resume(adev);
5437
5438	amdgpu_virt_ras_telemetry_post_reset(adev);
5439
5440	return `0`;
5441	}
5442
5443	/**
5444	* amdgpu_device_has_job_running - check if there is any unfinished job
5445	*
5446	* @adev: amdgpu_device pointer
5447	*
5448	* check if there is any job running on the device when guest driver receives
5449	* FLR notification from host driver. If there are still jobs running, then
5450	* the guest driver will not respond the FLR reset. Instead, let the job hit
5451	* the timeout and guest driver then issue the reset request.
5452	*/
5453	bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
5454	{
5455	int i;
5456
5457	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
5458	struct amdgpu_ring *ring = adev->rings[i];
5459
5460	if (!amdgpu_ring_sched_ready(ring))
5461	continue;
5462
5463	if (amdgpu_fence_count_emitted(ring))
5464	return true;
5465	}
5466	return false;
5467	}
5468
5469	/**
5470	* amdgpu_device_should_recover_gpu - check if we should try GPU recovery
5471	*
5472	* @adev: amdgpu_device pointer
5473	*
5474	* Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover
5475	* a hung GPU.
5476	*/
5477	bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
5478	{
5479
5480	if (amdgpu_gpu_recovery == `0`)
5481	goto disabled;
5482
5483	/ Skip soft reset check in fatal error mode /
5484	if (!amdgpu_ras_is_poison_mode_supported(adev))
5485	return true;
5486
5487	if (amdgpu_sriov_vf(adev))
5488	return true;
5489
5490	if (amdgpu_gpu_recovery == -`1`) {
5491	switch (adev->asic_type) {
5492	#ifdef CONFIG_DRM_AMDGPU_SI
5493	case CHIP_VERDE:
5494	case CHIP_TAHITI:
5495	case CHIP_PITCAIRN:
5496	case CHIP_OLAND:
5497	case CHIP_HAINAN:
5498	#endif
5499	#ifdef CONFIG_DRM_AMDGPU_CIK
5500	case CHIP_KAVERI:
5501	case CHIP_KABINI:
5502	case CHIP_MULLINS:
5503	#endif
5504	case CHIP_CARRIZO:
5505	case CHIP_STONEY:
5506	case CHIP_CYAN_SKILLFISH:
5507	goto disabled;
5508	default:
5509	break;
5510	}
5511	}
5512
5513	return true;
5514
5515	disabled:
5516	dev_info(adev->dev, "GPU recovery disabled.\n");
5517	return false;
5518	}
5519
5520	int amdgpu_device_mode1_reset(struct amdgpu_device *adev)
5521	{
5522	u32 i;
5523	int ret = `0`;
5524
5525	if (adev->bios)
5526	amdgpu_atombios_scratch_regs_engine_hung(adev, hung: true);
5527
5528	dev_info(adev->dev, "GPU mode1 reset\n");
5529
5530	/ Cache the state before bus master disable. The saved config space*
5531	* values are used in other cases like restore after mode-2 reset.
5532	*/
5533	amdgpu_device_cache_pci_state(pdev: adev->pdev);
5534
5535	/ disable BM /
5536	pci_clear_master(dev: adev->pdev);
5537
5538	if (amdgpu_dpm_is_mode1_reset_supported(adev)) {
5539	dev_info(adev->dev, "GPU smu mode1 reset\n");
5540	ret = amdgpu_dpm_mode1_reset(adev);
5541	} else {
5542	dev_info(adev->dev, "GPU psp mode1 reset\n");
5543	ret = psp_gpu_reset(adev);
5544	}
5545
5546	if (ret)
5547	goto mode1_reset_failed;
5548
5549	amdgpu_device_load_pci_state(pdev: adev->pdev);
5550	ret = amdgpu_psp_wait_for_bootloader(adev);
5551	if (ret)
5552	goto mode1_reset_failed;
5553
5554	/ wait for asic to come out of reset /
5555	for (i = `0`; i < adev->usec_timeout; i++) {
5556	u32 memsize = adev->nbio.funcs->get_memsize(adev);
5557
5558	if (memsize != `0xffffffff`)
5559	break;
5560	udelay(usec: `1`);
5561	}
5562
5563	if (i >= adev->usec_timeout) {
5564	ret = -ETIMEDOUT;
5565	goto mode1_reset_failed;
5566	}
5567
5568	if (adev->bios)
5569	amdgpu_atombios_scratch_regs_engine_hung(adev, hung: false);
5570
5571	return `0`;
5572
5573	mode1_reset_failed:
5574	dev_err(adev->dev, "GPU mode1 reset failed\n");
5575	return ret;
5576	}
5577
5578	int amdgpu_device_link_reset(struct amdgpu_device *adev)
5579	{
5580	int ret = `0`;
5581
5582	dev_info(adev->dev, "GPU link reset\n");
5583
5584	if (!adev->pcie_reset_ctx.occurs_dpc)
5585	ret = amdgpu_dpm_link_reset(adev);
5586
5587	if (ret)
5588	goto link_reset_failed;
5589
5590	ret = amdgpu_psp_wait_for_bootloader(adev);
5591	if (ret)
5592	goto link_reset_failed;
5593
5594	return `0`;
5595
5596	link_reset_failed:
5597	dev_err(adev->dev, "GPU link reset failed\n");
5598	return ret;
5599	}
5600
5601	int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
5602	struct amdgpu_reset_context *reset_context)
5603	{
5604	int i, r = `0`;
5605	struct amdgpu_job *job = NULL;
5606	struct amdgpu_device *tmp_adev = reset_context->reset_req_dev;
5607	bool need_full_reset =
5608	test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5609
5610	if (reset_context->reset_req_dev == adev)
5611	job = reset_context->job;
5612
5613	if (amdgpu_sriov_vf(adev))
5614	amdgpu_virt_pre_reset(adev);
5615
5616	amdgpu_fence_driver_isr_toggle(adev, stop: true);
5617
5618	/ block all schedulers and reset given job's ring /
5619	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
5620	struct amdgpu_ring *ring = adev->rings[i];
5621
5622	if (!amdgpu_ring_sched_ready(ring))
5623	continue;
5624
5625	/ Clear job fence from fence drv to avoid force_completion*
5626	* leave NULL and vm flush fence in fence drv
5627	*/
5628	amdgpu_fence_driver_clear_job_fences(ring);
5629
5630	/ after all hw jobs are reset, hw fence is meaningless, so force_completion /
5631	amdgpu_fence_driver_force_completion(ring);
5632	}
5633
5634	amdgpu_fence_driver_isr_toggle(adev, stop: false);
5635
5636	if (job && job->vm)
5637	drm_sched_increase_karma(bad: &job->base);
5638
5639	r = amdgpu_reset_prepare_hwcontext(adev, reset_context);
5640	/ If reset handler not implemented, continue; otherwise return /
5641	if (r == -EOPNOTSUPP)
5642	r = `0`;
5643	else
5644	return r;
5645
5646	/ Don't suspend on bare metal if we are not going to HW reset the ASIC /
5647	if (!amdgpu_sriov_vf(adev)) {
5648
5649	if (!need_full_reset)
5650	need_full_reset = amdgpu_device_ip_need_full_reset(adev);
5651
5652	if (!need_full_reset && amdgpu_gpu_recovery &&
5653	amdgpu_device_ip_check_soft_reset(adev)) {
5654	amdgpu_device_ip_pre_soft_reset(adev);
5655	r = amdgpu_device_ip_soft_reset(adev);
5656	amdgpu_device_ip_post_soft_reset(adev);
5657	if (r \|\| amdgpu_device_ip_check_soft_reset(adev)) {
5658	dev_info(adev->dev, "soft reset failed, will fallback to full reset!\n");
5659	need_full_reset = true;
5660	}
5661	}
5662
5663	if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags)) {
5664	dev_info(tmp_adev->dev, "Dumping IP State\n");
5665	/ Trigger ip dump before we reset the asic /
5666	for (i = `0`; i < tmp_adev->num_ip_blocks; i++)
5667	if (tmp_adev->ip_blocks[i].version->funcs->dump_ip_state)
5668	tmp_adev->ip_blocks[i].version->funcs
5669	->dump_ip_state((void *)&tmp_adev->ip_blocks[i]);
5670	dev_info(tmp_adev->dev, "Dumping IP State Completed\n");
5671	}
5672
5673	if (need_full_reset)
5674	r = amdgpu_device_ip_suspend(adev);
5675	if (need_full_reset)
5676	set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags);
5677	else
5678	clear_bit(nr: AMDGPU_NEED_FULL_RESET,
5679	addr: &reset_context->flags);
5680	}
5681
5682	return r;
5683	}
5684
5685	int amdgpu_device_reinit_after_reset(struct amdgpu_reset_context *reset_context)
5686	{
5687	struct list_head *device_list_handle;
5688	bool full_reset, vram_lost = false;
5689	struct amdgpu_device *tmp_adev;
5690	int r, init_level;
5691
5692	device_list_handle = reset_context->reset_device_list;
5693
5694	if (!device_list_handle)
5695	return -EINVAL;
5696
5697	full_reset = test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5698
5699	/**
5700	* If it's reset on init, it's default init level, otherwise keep level
5701	* as recovery level.
5702	*/
5703	if (reset_context->method == AMD_RESET_METHOD_ON_INIT)
5704	init_level = AMDGPU_INIT_LEVEL_DEFAULT;
5705	else
5706	init_level = AMDGPU_INIT_LEVEL_RESET_RECOVERY;
5707
5708	r = `0`;
5709	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5710	amdgpu_set_init_level(adev: tmp_adev, lvl: init_level);
5711	if (full_reset) {
5712	/ post card /
5713	amdgpu_ras_clear_err_state(adev: tmp_adev);
5714	r = amdgpu_device_asic_init(adev: tmp_adev);
5715	if (r) {
5716	dev_warn(tmp_adev->dev, "asic atom init failed!");
5717	} else {
5718	dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n");
5719
5720	r = amdgpu_device_ip_resume_phase1(adev: tmp_adev);
5721	if (r)
5722	goto out;
5723
5724	vram_lost = amdgpu_device_check_vram_lost(adev: tmp_adev);
5725
5726	if (!test_bit(AMDGPU_SKIP_COREDUMP, &reset_context->flags))
5727	amdgpu_coredump(adev: tmp_adev, skip_vram_check: false, vram_lost, job: reset_context->job);
5728
5729	if (vram_lost) {
5730	DRM_INFO("VRAM is lost due to GPU reset!\n");
5731	amdgpu_inc_vram_lost(tmp_adev);
5732	}
5733
5734	r = amdgpu_device_fw_loading(adev: tmp_adev);
5735	if (r)
5736	return r;
5737
5738	r = amdgpu_xcp_restore_partition_mode(
5739	xcp_mgr: tmp_adev->xcp_mgr);
5740	if (r)
5741	goto out;
5742
5743	r = amdgpu_device_ip_resume_phase2(adev: tmp_adev);
5744	if (r)
5745	goto out;
5746
5747	if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
5748	amdgpu_ttm_set_buffer_funcs_status(adev: tmp_adev, enable: true);
5749
5750	r = amdgpu_device_ip_resume_phase3(adev: tmp_adev);
5751	if (r)
5752	goto out;
5753
5754	if (vram_lost)
5755	amdgpu_device_fill_reset_magic(adev: tmp_adev);
5756
5757	/*
5758	* Add this ASIC as tracked as reset was already
5759	* complete successfully.
5760	*/
5761	amdgpu_register_gpu_instance(adev: tmp_adev);
5762
5763	if (!reset_context->hive &&
5764	tmp_adev->gmc.xgmi.num_physical_nodes > `1`)
5765	amdgpu_xgmi_add_device(adev: tmp_adev);
5766
5767	r = amdgpu_device_ip_late_init(adev: tmp_adev);
5768	if (r)
5769	goto out;
5770
5771	drm_client_dev_resume(dev: adev_to_drm(adev: tmp_adev), holds_console_lock: false);
5772
5773	/*
5774	* The GPU enters bad state once faulty pages
5775	* by ECC has reached the threshold, and ras
5776	* recovery is scheduled next. So add one check
5777	* here to break recovery if it indeed exceeds
5778	* bad page threshold, and remind user to
5779	* retire this GPU or setting one bigger
5780	* bad_page_threshold value to fix this once
5781	* probing driver again.
5782	*/
5783	if (!amdgpu_ras_is_rma(adev: tmp_adev)) {
5784	/ must succeed. /
5785	amdgpu_ras_resume(adev: tmp_adev);
5786	} else {
5787	r = -EINVAL;
5788	goto out;
5789	}
5790
5791	/ Update PSP FW topology after reset /
5792	if (reset_context->hive &&
5793	tmp_adev->gmc.xgmi.num_physical_nodes > `1`)
5794	r = amdgpu_xgmi_update_topology(
5795	hive: reset_context->hive, adev: tmp_adev);
5796	}
5797	}
5798
5799	out:
5800	if (!r) {
5801	/ IP init is complete now, set level as default /
5802	amdgpu_set_init_level(adev: tmp_adev,
5803	lvl: AMDGPU_INIT_LEVEL_DEFAULT);
5804	amdgpu_irq_gpu_reset_resume_helper(adev: tmp_adev);
5805	r = amdgpu_ib_ring_tests(adev: tmp_adev);
5806	if (r) {
5807	dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r);
5808	r = -EAGAIN;
5809	goto end;
5810	}
5811	}
5812
5813	if (r)
5814	tmp_adev->asic_reset_res = r;
5815	}
5816
5817	end:
5818	return r;
5819	}
5820
5821	int amdgpu_do_asic_reset(struct list_head *device_list_handle,
5822	struct amdgpu_reset_context *reset_context)
5823	{
5824	struct amdgpu_device *tmp_adev = NULL;
5825	bool need_full_reset, skip_hw_reset;
5826	int r = `0`;
5827
5828	/ Try reset handler method first /
5829	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
5830	reset_list);
5831
5832	reset_context->reset_device_list = device_list_handle;
5833	r = amdgpu_reset_perform_reset(adev: tmp_adev, reset_context);
5834	/ If reset handler not implemented, continue; otherwise return /
5835	if (r == -EOPNOTSUPP)
5836	r = `0`;
5837	else
5838	return r;
5839
5840	/ Reset handler not implemented, use the default method /
5841	need_full_reset =
5842	test_bit(AMDGPU_NEED_FULL_RESET, &reset_context->flags);
5843	skip_hw_reset = test_bit(AMDGPU_SKIP_HW_RESET, &reset_context->flags);
5844
5845	/*
5846	* ASIC reset has to be done on all XGMI hive nodes ASAP
5847	* to allow proper links negotiation in FW (within 1 sec)
5848	*/
5849	if (!skip_hw_reset && need_full_reset) {
5850	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5851	/ For XGMI run all resets in parallel to speed up the process /
5852	if (tmp_adev->gmc.xgmi.num_physical_nodes > `1`) {
5853	if (!queue_work(wq: system_unbound_wq,
5854	work: &tmp_adev->xgmi_reset_work))
5855	r = -EALREADY;
5856	} else
5857	r = amdgpu_asic_reset(tmp_adev);
5858
5859	if (r) {
5860	dev_err(tmp_adev->dev,
5861	"ASIC reset failed with error, %d for drm dev, %s",
5862	r, adev_to_drm(tmp_adev)->unique);
5863	goto out;
5864	}
5865	}
5866
5867	/ For XGMI wait for all resets to complete before proceed /
5868	if (!r) {
5869	list_for_each_entry(tmp_adev, device_list_handle,
5870	reset_list) {
5871	if (tmp_adev->gmc.xgmi.num_physical_nodes > `1`) {
5872	flush_work(work: &tmp_adev->xgmi_reset_work);
5873	r = tmp_adev->asic_reset_res;
5874	if (r)
5875	break;
5876	}
5877	}
5878	}
5879	}
5880
5881	if (!r && amdgpu_ras_intr_triggered()) {
5882	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
5883	amdgpu_ras_reset_error_count(adev: tmp_adev,
5884	block: AMDGPU_RAS_BLOCK__MMHUB);
5885	}
5886
5887	amdgpu_ras_intr_cleared();
5888	}
5889
5890	r = amdgpu_device_reinit_after_reset(reset_context);
5891	if (r == -EAGAIN)
5892	set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags);
5893	else
5894	clear_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context->flags);
5895
5896	out:
5897	return r;
5898	}
5899
5900	static void amdgpu_device_set_mp1_state(struct amdgpu_device *adev)
5901	{
5902
5903	switch (amdgpu_asic_reset_method(adev)) {
5904	case AMD_RESET_METHOD_MODE1:
5905	case AMD_RESET_METHOD_LINK:
5906	adev->mp1_state = PP_MP1_STATE_SHUTDOWN;
5907	break;
5908	case AMD_RESET_METHOD_MODE2:
5909	adev->mp1_state = PP_MP1_STATE_RESET;
5910	break;
5911	default:
5912	adev->mp1_state = PP_MP1_STATE_NONE;
5913	break;
5914	}
5915	}
5916
5917	static void amdgpu_device_unset_mp1_state(struct amdgpu_device *adev)
5918	{
5919	amdgpu_vf_error_trans_all(adev);
5920	adev->mp1_state = PP_MP1_STATE_NONE;
5921	}
5922
5923	static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
5924	{
5925	struct pci_dev *p = NULL;
5926
5927	p = pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: adev->pdev->bus),
5928	bus: adev->pdev->bus->number, devfn: `1`);
5929	if (p) {
5930	pm_runtime_enable(dev: &(p->dev));
5931	pm_runtime_resume(dev: &(p->dev));
5932	}
5933
5934	pci_dev_put(dev: p);
5935	}
5936
5937	static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
5938	{
5939	enum amd_reset_method reset_method;
5940	struct pci_dev *p = NULL;
5941	u64 expires;
5942
5943	/*
5944	* For now, only BACO and mode1 reset are confirmed
5945	* to suffer the audio issue without proper suspended.
5946	*/
5947	reset_method = amdgpu_asic_reset_method(adev);
5948	if ((reset_method != AMD_RESET_METHOD_BACO) &&
5949	(reset_method != AMD_RESET_METHOD_MODE1))
5950	return -EINVAL;
5951
5952	p = pci_get_domain_bus_and_slot(domain: pci_domain_nr(bus: adev->pdev->bus),
5953	bus: adev->pdev->bus->number, devfn: `1`);
5954	if (!p)
5955	return -ENODEV;
5956
5957	expires = pm_runtime_autosuspend_expiration(dev: &(p->dev));
5958	if (!expires)
5959	/*
5960	* If we cannot get the audio device autosuspend delay,
5961	* a fixed 4S interval will be used. Considering 3S is
5962	* the audio controller default autosuspend delay setting.
5963	* 4S used here is guaranteed to cover that.
5964	*/
5965	expires = ktime_get_mono_fast_ns() + NSEC_PER_SEC * `4ULL`;
5966
5967	while (!pm_runtime_status_suspended(dev: &(p->dev))) {
5968	if (!pm_runtime_suspend(dev: &(p->dev)))
5969	break;
5970
5971	if (expires < ktime_get_mono_fast_ns()) {
5972	dev_warn(adev->dev, "failed to suspend display audio\n");
5973	pci_dev_put(dev: p);
5974	/ TODO: abort the succeeding gpu reset? /
5975	return -ETIMEDOUT;
5976	}
5977	}
5978
5979	pm_runtime_disable(dev: &(p->dev));
5980
5981	pci_dev_put(dev: p);
5982	return `0`;
5983	}
5984
5985	static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
5986	{
5987	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
5988
5989	#if defined(CONFIG_DEBUG_FS)
5990	if (!amdgpu_sriov_vf(adev))
5991	cancel_work(work: &adev->reset_work);
5992	#endif
5993
5994	if (adev->kfd.dev)
5995	cancel_work(work: &adev->kfd.reset_work);
5996
5997	if (amdgpu_sriov_vf(adev))
5998	cancel_work(work: &adev->virt.flr_work);
5999
6000	if (con && adev->ras_enabled)
6001	cancel_work(work: &con->recovery_work);
6002
6003	}
6004
6005	static int amdgpu_device_health_check(struct list_head *device_list_handle)
6006	{
6007	struct amdgpu_device *tmp_adev;
6008	int ret = `0`;
6009	u32 status;
6010
6011	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6012	pci_read_config_dword(dev: tmp_adev->pdev, PCI_COMMAND, val: &status);
6013	if (PCI_POSSIBLE_ERROR(status)) {
6014	dev_err(tmp_adev->dev, "device lost from bus!");
6015	ret = -ENODEV;
6016	}
6017	}
6018
6019	return ret;
6020	}
6021
6022	static int amdgpu_device_halt_activities(struct amdgpu_device *adev,
6023	struct amdgpu_job *job,
6024	struct amdgpu_reset_context *reset_context,
6025	struct list_head *device_list,
6026	struct amdgpu_hive_info *hive,
6027	bool need_emergency_restart)
6028	{
6029	struct list_head *device_list_handle = NULL;
6030	struct amdgpu_device *tmp_adev = NULL;
6031	int i, r = `0`;
6032
6033	/*
6034	* Build list of devices to reset.
6035	* In case we are in XGMI hive mode, resort the device list
6036	* to put adev in the 1st position.
6037	*/
6038	if (!amdgpu_sriov_vf(adev) && (adev->gmc.xgmi.num_physical_nodes > `1`) && hive) {
6039	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6040	list_add_tail(new: &tmp_adev->reset_list, head: device_list);
6041	if (adev->shutdown)
6042	tmp_adev->shutdown = true;
6043	if (adev->pcie_reset_ctx.occurs_dpc)
6044	tmp_adev->pcie_reset_ctx.in_link_reset = true;
6045	}
6046	if (!list_is_first(list: &adev->reset_list, head: device_list))
6047	list_rotate_to_front(list: &adev->reset_list, head: device_list);
6048	device_list_handle = device_list;
6049	} else {
6050	list_add_tail(new: &adev->reset_list, head: device_list);
6051	device_list_handle = device_list;
6052	}
6053
6054	if (!amdgpu_sriov_vf(adev) && (!adev->pcie_reset_ctx.occurs_dpc)) {
6055	r = amdgpu_device_health_check(device_list_handle);
6056	if (r)
6057	return r;
6058	}
6059
6060	/ We need to lock reset domain only once both for XGMI and single device /
6061	tmp_adev = list_first_entry(device_list_handle, struct amdgpu_device,
6062	reset_list);
6063	amdgpu_device_lock_reset_domain(reset_domain: tmp_adev->reset_domain);
6064
6065	/ block all schedulers and reset given job's ring /
6066	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
6067
6068	amdgpu_device_set_mp1_state(adev: tmp_adev);
6069
6070	/*
6071	* Try to put the audio codec into suspend state
6072	* before gpu reset started.
6073	*
6074	* Due to the power domain of the graphics device
6075	* is shared with AZ power domain. Without this,
6076	* we may change the audio hardware from behind
6077	* the audio driver's back. That will trigger
6078	* some audio codec errors.
6079	*/
6080	if (!amdgpu_device_suspend_display_audio(adev: tmp_adev))
6081	tmp_adev->pcie_reset_ctx.audio_suspended = true;
6082
6083	amdgpu_ras_set_error_query_ready(adev: tmp_adev, ready: false);
6084
6085	cancel_delayed_work_sync(dwork: &tmp_adev->delayed_init_work);
6086
6087	amdgpu_amdkfd_pre_reset(adev: tmp_adev, reset_context);
6088
6089	/*
6090	* Mark these ASICs to be reset as untracked first
6091	* And add them back after reset completed
6092	*/
6093	amdgpu_unregister_gpu_instance(adev: tmp_adev);
6094
6095	drm_client_dev_suspend(dev: adev_to_drm(adev: tmp_adev), holds_console_lock: false);
6096
6097	/ disable ras on ALL IPs /
6098	if (!need_emergency_restart &&
6099	(!adev->pcie_reset_ctx.occurs_dpc) &&
6100	amdgpu_device_ip_need_full_reset(adev: tmp_adev))
6101	amdgpu_ras_suspend(adev: tmp_adev);
6102
6103	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
6104	struct amdgpu_ring *ring = tmp_adev->rings[i];
6105
6106	if (!amdgpu_ring_sched_ready(ring))
6107	continue;
6108
6109	drm_sched_stop(sched: &ring->sched, bad: job ? &job->base : NULL);
6110
6111	if (need_emergency_restart)
6112	amdgpu_job_stop_all_jobs_on_sched(sched: &ring->sched);
6113	}
6114	atomic_inc(v: &tmp_adev->gpu_reset_counter);
6115	}
6116
6117	return r;
6118	}
6119
6120	static int amdgpu_device_asic_reset(struct amdgpu_device *adev,
6121	struct list_head *device_list,
6122	struct amdgpu_reset_context *reset_context)
6123	{
6124	struct amdgpu_device *tmp_adev = NULL;
6125	int retry_limit = AMDGPU_MAX_RETRY_LIMIT;
6126	int r = `0`;
6127
6128	retry: / Rest of adevs pre asic reset from XGMI hive. /
6129	list_for_each_entry(tmp_adev, device_list, reset_list) {
6130	if (adev->pcie_reset_ctx.occurs_dpc)
6131	tmp_adev->no_hw_access = true;
6132	r = amdgpu_device_pre_asic_reset(adev: tmp_adev, reset_context);
6133	if (adev->pcie_reset_ctx.occurs_dpc)
6134	tmp_adev->no_hw_access = false;
6135	/TODO Should we stop ?/
6136	if (r) {
6137	dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
6138	r, adev_to_drm(tmp_adev)->unique);
6139	tmp_adev->asic_reset_res = r;
6140	}
6141	}
6142
6143	/ Actual ASIC resets if needed./
6144	/ Host driver will handle XGMI hive reset for SRIOV /
6145	if (amdgpu_sriov_vf(adev)) {
6146
6147	/ Bail out of reset early /
6148	if (amdgpu_ras_is_rma(adev))
6149	return -ENODEV;
6150
6151	if (amdgpu_ras_get_fed_status(adev) \|\| amdgpu_virt_rcvd_ras_interrupt(adev)) {
6152	dev_dbg(adev->dev, "Detected RAS error, wait for FLR completion\n");
6153	amdgpu_ras_set_fed(adev, status: true);
6154	set_bit(nr: AMDGPU_HOST_FLR, addr: &reset_context->flags);
6155	}
6156
6157	r = amdgpu_device_reset_sriov(adev, reset_context);
6158	if (AMDGPU_RETRY_SRIOV_RESET(r) && (retry_limit--) > `0`) {
6159	amdgpu_virt_release_full_gpu(adev, init: true);
6160	goto retry;
6161	}
6162	if (r)
6163	adev->asic_reset_res = r;
6164	} else {
6165	r = amdgpu_do_asic_reset(device_list_handle: device_list, reset_context);
6166	if (r && r == -EAGAIN)
6167	goto retry;
6168	}
6169
6170	list_for_each_entry(tmp_adev, device_list, reset_list) {
6171	/*
6172	* Drop any pending non scheduler resets queued before reset is done.
6173	* Any reset scheduled after this point would be valid. Scheduler resets
6174	* were already dropped during drm_sched_stop and no new ones can come
6175	* in before drm_sched_start.
6176	*/
6177	amdgpu_device_stop_pending_resets(adev: tmp_adev);
6178	}
6179
6180	return r;
6181	}
6182
6183	static int amdgpu_device_sched_resume(struct list_head *device_list,
6184	struct amdgpu_reset_context *reset_context,
6185	bool job_signaled)
6186	{
6187	struct amdgpu_device *tmp_adev = NULL;
6188	int i, r = `0`;
6189
6190	/ Post ASIC reset for all devs ./
6191	list_for_each_entry(tmp_adev, device_list, reset_list) {
6192
6193	for (i = `0`; i < AMDGPU_MAX_RINGS; ++i) {
6194	struct amdgpu_ring *ring = tmp_adev->rings[i];
6195
6196	if (!amdgpu_ring_sched_ready(ring))
6197	continue;
6198
6199	drm_sched_start(sched: &ring->sched, errno: `0`);
6200	}
6201
6202	if (!drm_drv_uses_atomic_modeset(dev: adev_to_drm(adev: tmp_adev)) && !job_signaled)
6203	drm_helper_resume_force_mode(dev: adev_to_drm(adev: tmp_adev));
6204
6205	if (tmp_adev->asic_reset_res)
6206	r = tmp_adev->asic_reset_res;
6207
6208	tmp_adev->asic_reset_res = `0`;
6209
6210	if (r) {
6211	/ bad news, how to tell it to userspace ?*
6212	* for ras error, we should report GPU bad status instead of
6213	* reset failure
6214	*/
6215	if (reset_context->src != AMDGPU_RESET_SRC_RAS \|\|
6216	!amdgpu_ras_eeprom_check_err_threshold(adev: tmp_adev))
6217	dev_info(tmp_adev->dev, "GPU reset(%d) failed\n",
6218	atomic_read(&tmp_adev->gpu_reset_counter));
6219	amdgpu_vf_error_put(adev: tmp_adev, sub_error_code: AMDGIM_ERROR_VF_GPU_RESET_FAIL, error_flags: `0`, error_data: r);
6220	} else {
6221	dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&tmp_adev->gpu_reset_counter));
6222	if (amdgpu_acpi_smart_shift_update(dev: adev_to_drm(adev: tmp_adev), ss_state: AMDGPU_SS_DEV_D0))
6223	DRM_WARN("smart shift update failed\n");
6224	}
6225	}
6226
6227	return r;
6228	}
6229
6230	static void amdgpu_device_gpu_resume(struct amdgpu_device *adev,
6231	struct list_head *device_list,
6232	bool need_emergency_restart)
6233	{
6234	struct amdgpu_device *tmp_adev = NULL;
6235
6236	list_for_each_entry(tmp_adev, device_list, reset_list) {
6237	/ unlock kfd: SRIOV would do it separately /
6238	if (!need_emergency_restart && !amdgpu_sriov_vf(tmp_adev))
6239	amdgpu_amdkfd_post_reset(adev: tmp_adev);
6240
6241	/ kfd_post_reset will do nothing if kfd device is not initialized,*
6242	* need to bring up kfd here if it's not be initialized before
6243	*/
6244	if (!adev->kfd.init_complete)
6245	amdgpu_amdkfd_device_init(adev);
6246
6247	if (tmp_adev->pcie_reset_ctx.audio_suspended)
6248	amdgpu_device_resume_display_audio(adev: tmp_adev);
6249
6250	amdgpu_device_unset_mp1_state(adev: tmp_adev);
6251
6252	amdgpu_ras_set_error_query_ready(adev: tmp_adev, ready: true);
6253
6254	}
6255
6256	tmp_adev = list_first_entry(device_list, struct amdgpu_device,
6257	reset_list);
6258	amdgpu_device_unlock_reset_domain(reset_domain: tmp_adev->reset_domain);
6259
6260	}
6261
6262
6263	/**
6264	* amdgpu_device_gpu_recover - reset the asic and recover scheduler
6265	*
6266	* @adev: amdgpu_device pointer
6267	* @job: which job trigger hang
6268	* @reset_context: amdgpu reset context pointer
6269	*
6270	* Attempt to reset the GPU if it has hung (all asics).
6271	* Attempt to do soft-reset or full-reset and reinitialize Asic
6272	* Returns 0 for success or an error on failure.
6273	*/
6274
6275	int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
6276	struct amdgpu_job *job,
6277	struct amdgpu_reset_context *reset_context)
6278	{
6279	struct list_head device_list;
6280	bool job_signaled = false;
6281	struct amdgpu_hive_info *hive = NULL;
6282	int r = `0`;
6283	bool need_emergency_restart = false;
6284
6285	/*
6286	* If it reaches here because of hang/timeout and a RAS error is
6287	* detected at the same time, let RAS recovery take care of it.
6288	*/
6289	if (amdgpu_ras_is_err_state(adev, block: AMDGPU_RAS_BLOCK__ANY) &&
6290	!amdgpu_sriov_vf(adev) &&
6291	reset_context->src != AMDGPU_RESET_SRC_RAS) {
6292	dev_dbg(adev->dev,
6293	"Gpu recovery from source: %d yielding to RAS error recovery handling",
6294	reset_context->src);
6295	return `0`;
6296	}
6297
6298	/*
6299	* Special case: RAS triggered and full reset isn't supported
6300	*/
6301	need_emergency_restart = amdgpu_ras_need_emergency_restart(adev);
6302
6303	/*
6304	* Flush RAM to disk so that after reboot
6305	* the user can read log and see why the system rebooted.
6306	*/
6307	if (need_emergency_restart && amdgpu_ras_get_context(adev) &&
6308	amdgpu_ras_get_context(adev)->reboot) {
6309	DRM_WARN("Emergency reboot.");
6310
6311	ksys_sync_helper();
6312	emergency_restart();
6313	}
6314
6315	dev_info(adev->dev, "GPU %s begin!\n",
6316	need_emergency_restart ? "jobs stop":"reset");
6317
6318	if (!amdgpu_sriov_vf(adev))
6319	hive = amdgpu_get_xgmi_hive(adev);
6320	if (hive)
6321	mutex_lock(&hive->hive_lock);
6322
6323	reset_context->job = job;
6324	reset_context->hive = hive;
6325	INIT_LIST_HEAD(list: &device_list);
6326
6327	r = amdgpu_device_halt_activities(adev, job, reset_context, device_list: &device_list,
6328	hive, need_emergency_restart);
6329	if (r)
6330	goto end_reset;
6331
6332	if (need_emergency_restart)
6333	goto skip_sched_resume;
6334	/*
6335	* Must check guilty signal here since after this point all old
6336	* HW fences are force signaled.
6337	*
6338	* job->base holds a reference to parent fence
6339	*/
6340	if (job && dma_fence_is_signaled(fence: &job->hw_fence)) {
6341	job_signaled = true;
6342	dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
6343	goto skip_hw_reset;
6344	}
6345
6346	r = amdgpu_device_asic_reset(adev, device_list: &device_list, reset_context);
6347	if (r)
6348	goto end_reset;
6349	skip_hw_reset:
6350	r = amdgpu_device_sched_resume(device_list: &device_list, reset_context, job_signaled);
6351	if (r)
6352	goto end_reset;
6353	skip_sched_resume:
6354	amdgpu_device_gpu_resume(adev, device_list: &device_list, need_emergency_restart);
6355	end_reset:
6356	if (hive) {
6357	mutex_unlock(lock: &hive->hive_lock);
6358	amdgpu_put_xgmi_hive(hive);
6359	}
6360
6361	if (r)
6362	dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
6363
6364	atomic_set(v: &adev->reset_domain->reset_res, i: r);
6365
6366	if (!r)
6367	drm_dev_wedged_event(dev: adev_to_drm(adev), DRM_WEDGE_RECOVERY_NONE);
6368
6369	return r;
6370	}
6371
6372	/**
6373	* amdgpu_device_partner_bandwidth - find the bandwidth of appropriate partner
6374	*
6375	* @adev: amdgpu_device pointer
6376	* @speed: pointer to the speed of the link
6377	* @width: pointer to the width of the link
6378	*
6379	* Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6380	* first physical partner to an AMD dGPU.
6381	* This will exclude any virtual switches and links.
6382	*/
6383	static void amdgpu_device_partner_bandwidth(struct amdgpu_device *adev,
6384	enum pci_bus_speed *speed,
6385	enum pcie_link_width *width)
6386	{
6387	struct pci_dev *parent = adev->pdev;
6388
6389	if (!speed \|\| !width)
6390	return;
6391
6392	*speed = PCI_SPEED_UNKNOWN;
6393	*width = PCIE_LNK_WIDTH_UNKNOWN;
6394
6395	if (amdgpu_device_pcie_dynamic_switching_supported(adev)) {
6396	while ((parent = pci_upstream_bridge(dev: parent))) {
6397	/ skip upstream/downstream switches internal to dGPU/
6398	if (parent->vendor == PCI_VENDOR_ID_ATI)
6399	continue;
6400	*speed = pcie_get_speed_cap(dev: parent);
6401	*width = pcie_get_width_cap(dev: parent);
6402	break;
6403	}
6404	} else {
6405	/ use the current speeds rather than max if switching is not supported /
6406	pcie_bandwidth_available(dev: adev->pdev, NULL, speed, width);
6407	}
6408	}
6409
6410	/**
6411	* amdgpu_device_gpu_bandwidth - find the bandwidth of the GPU
6412	*
6413	* @adev: amdgpu_device pointer
6414	* @speed: pointer to the speed of the link
6415	* @width: pointer to the width of the link
6416	*
6417	* Evaluate the hierarchy to find the speed and bandwidth capabilities of the
6418	* AMD dGPU which may be a virtual upstream bridge.
6419	*/
6420	static void amdgpu_device_gpu_bandwidth(struct amdgpu_device *adev,
6421	enum pci_bus_speed *speed,
6422	enum pcie_link_width *width)
6423	{
6424	struct pci_dev *parent = adev->pdev;
6425
6426	if (!speed \|\| !width)
6427	return;
6428
6429	parent = pci_upstream_bridge(dev: parent);
6430	if (parent && parent->vendor == PCI_VENDOR_ID_ATI) {
6431	/ use the upstream/downstream switches internal to dGPU /
6432	*speed = pcie_get_speed_cap(dev: parent);
6433	*width = pcie_get_width_cap(dev: parent);
6434	while ((parent = pci_upstream_bridge(dev: parent))) {
6435	if (parent->vendor == PCI_VENDOR_ID_ATI) {
6436	/ use the upstream/downstream switches internal to dGPU /
6437	*speed = pcie_get_speed_cap(dev: parent);
6438	*width = pcie_get_width_cap(dev: parent);
6439	}
6440	}
6441	} else {
6442	/ use the device itself /
6443	*speed = pcie_get_speed_cap(dev: adev->pdev);
6444	*width = pcie_get_width_cap(dev: adev->pdev);
6445	}
6446	}
6447
6448	/**
6449	* amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
6450	*
6451	* @adev: amdgpu_device pointer
6452	*
6453	* Fetches and stores in the driver the PCIE capabilities (gen speed
6454	* and lanes) of the slot the device is in. Handles APUs and
6455	* virtualized environments where PCIE config space may not be available.
6456	*/
6457	static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
6458	{
6459	enum pci_bus_speed speed_cap, platform_speed_cap;
6460	enum pcie_link_width platform_link_width, link_width;
6461
6462	if (amdgpu_pcie_gen_cap)
6463	adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
6464
6465	if (amdgpu_pcie_lane_cap)
6466	adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
6467
6468	/ covers APUs as well /
6469	if (pci_is_root_bus(pbus: adev->pdev->bus) && !amdgpu_passthrough(adev)) {
6470	if (adev->pm.pcie_gen_mask == `0`)
6471	adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
6472	if (adev->pm.pcie_mlw_mask == `0`)
6473	adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
6474	return;
6475	}
6476
6477	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
6478	return;
6479
6480	amdgpu_device_partner_bandwidth(adev, speed: &platform_speed_cap,
6481	width: &platform_link_width);
6482	amdgpu_device_gpu_bandwidth(adev, speed: &speed_cap, width: &link_width);
6483
6484	if (adev->pm.pcie_gen_mask == `0`) {
6485	/ asic caps /
6486	if (speed_cap == PCI_SPEED_UNKNOWN) {
6487	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6488	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6489	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6490	} else {
6491	if (speed_cap == PCIE_SPEED_32_0GT)
6492	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6493	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6494	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 \|
6495	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4 \|
6496	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN5);
6497	else if (speed_cap == PCIE_SPEED_16_0GT)
6498	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6499	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6500	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 \|
6501	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
6502	else if (speed_cap == PCIE_SPEED_8_0GT)
6503	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6504	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6505	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
6506	else if (speed_cap == PCIE_SPEED_5_0GT)
6507	adev->pm.pcie_gen_mask \|= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6508	CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
6509	else
6510	adev->pm.pcie_gen_mask \|= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
6511	}
6512	/ platform caps /
6513	if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
6514	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6515	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6516	} else {
6517	if (platform_speed_cap == PCIE_SPEED_32_0GT)
6518	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6519	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6520	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 \|
6521	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 \|
6522	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN5);
6523	else if (platform_speed_cap == PCIE_SPEED_16_0GT)
6524	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6525	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6526	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 \|
6527	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
6528	else if (platform_speed_cap == PCIE_SPEED_8_0GT)
6529	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6530	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 \|
6531	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
6532	else if (platform_speed_cap == PCIE_SPEED_5_0GT)
6533	adev->pm.pcie_gen_mask \|= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 \|
6534	CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
6535	else
6536	adev->pm.pcie_gen_mask \|= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
6537
6538	}
6539	}
6540	if (adev->pm.pcie_mlw_mask == `0`) {
6541	/ asic caps /
6542	if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6543	adev->pm.pcie_mlw_mask \|= AMDGPU_DEFAULT_ASIC_PCIE_MLW_MASK;
6544	} else {
6545	switch (link_width) {
6546	case PCIE_LNK_X32:
6547	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X32 \|
6548	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 \|
6549	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6550	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6551	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6552	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6553	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6554	break;
6555	case PCIE_LNK_X16:
6556	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X16 \|
6557	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6558	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6559	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6560	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6561	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6562	break;
6563	case PCIE_LNK_X12:
6564	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6565	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6566	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6567	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6568	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6569	break;
6570	case PCIE_LNK_X8:
6571	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6572	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6573	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6574	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6575	break;
6576	case PCIE_LNK_X4:
6577	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6578	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6579	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6580	break;
6581	case PCIE_LNK_X2:
6582	adev->pm.pcie_mlw_mask \|= (CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6583	CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1);
6584	break;
6585	case PCIE_LNK_X1:
6586	adev->pm.pcie_mlw_mask \|= CAIL_ASIC_PCIE_LINK_WIDTH_SUPPORT_X1;
6587	break;
6588	default:
6589	break;
6590	}
6591	}
6592	/ platform caps /
6593	if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
6594	adev->pm.pcie_mlw_mask \|= AMDGPU_DEFAULT_PCIE_MLW_MASK;
6595	} else {
6596	switch (platform_link_width) {
6597	case PCIE_LNK_X32:
6598	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 \|
6599	CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 \|
6600	CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6601	CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6602	CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6603	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6604	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6605	break;
6606	case PCIE_LNK_X16:
6607	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 \|
6608	CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6609	CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6610	CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6611	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6612	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6613	break;
6614	case PCIE_LNK_X12:
6615	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 \|
6616	CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6617	CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6618	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6619	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6620	break;
6621	case PCIE_LNK_X8:
6622	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 \|
6623	CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6624	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6625	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6626	break;
6627	case PCIE_LNK_X4:
6628	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 \|
6629	CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6630	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6631	break;
6632	case PCIE_LNK_X2:
6633	adev->pm.pcie_mlw_mask \|= (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 \|
6634	CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
6635	break;
6636	case PCIE_LNK_X1:
6637	adev->pm.pcie_mlw_mask \|= CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
6638	break;
6639	default:
6640	break;
6641	}
6642	}
6643	}
6644	}
6645
6646	/**
6647	* amdgpu_device_is_peer_accessible - Check peer access through PCIe BAR
6648	*
6649	* @adev: amdgpu_device pointer
6650	* @peer_adev: amdgpu_device pointer for peer device trying to access @adev
6651	*
6652	* Return true if @peer_adev can access (DMA) @adev through the PCIe
6653	* BAR, i.e. @adev is "large BAR" and the BAR matches the DMA mask of
6654	* @peer_adev.
6655	*/
6656	bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
6657	struct amdgpu_device *peer_adev)
6658	{
6659	#ifdef CONFIG_HSA_AMD_P2P
6660	bool p2p_access =
6661	!adev->gmc.xgmi.connected_to_cpu &&
6662	!(pci_p2pdma_distance(provider: adev->pdev, client: peer_adev->dev, verbose: false) < `0`);
6663	if (!p2p_access)
6664	dev_info(adev->dev, "PCIe P2P access from peer device %s is not supported by the chipset\n",
6665	pci_name(peer_adev->pdev));
6666
6667	bool is_large_bar = adev->gmc.visible_vram_size &&
6668	adev->gmc.real_vram_size == adev->gmc.visible_vram_size;
6669	bool p2p_addressable = amdgpu_device_check_iommu_remap(adev: peer_adev);
6670
6671	if (!p2p_addressable) {
6672	uint64_t address_mask = peer_adev->dev->dma_mask ?
6673	~*peer_adev->dev->dma_mask : ~((`1ULL` << `32`) - `1`);
6674	resource_size_t aper_limit =
6675	adev->gmc.aper_base + adev->gmc.aper_size - `1`;
6676
6677	p2p_addressable = !(adev->gmc.aper_base & address_mask \|\|
6678	aper_limit & address_mask);
6679	}
6680	return pcie_p2p && is_large_bar && p2p_access && p2p_addressable;
6681	#else
6682	return false;
6683	#endif
6684	}
6685
6686	int amdgpu_device_baco_enter(struct drm_device *dev)
6687	{
6688	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
6689	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6690
6691	if (!amdgpu_device_supports_baco(dev))
6692	return -ENOTSUPP;
6693
6694	if (ras && adev->ras_enabled &&
6695	adev->nbio.funcs->enable_doorbell_interrupt)
6696	adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
6697
6698	return amdgpu_dpm_baco_enter(adev);
6699	}
6700
6701	int amdgpu_device_baco_exit(struct drm_device *dev)
6702	{
6703	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
6704	struct amdgpu_ras *ras = amdgpu_ras_get_context(adev);
6705	int ret = `0`;
6706
6707	if (!amdgpu_device_supports_baco(dev))
6708	return -ENOTSUPP;
6709
6710	ret = amdgpu_dpm_baco_exit(adev);
6711	if (ret)
6712	return ret;
6713
6714	if (ras && adev->ras_enabled &&
6715	adev->nbio.funcs->enable_doorbell_interrupt)
6716	adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
6717
6718	if (amdgpu_passthrough(adev) && adev->nbio.funcs &&
6719	adev->nbio.funcs->clear_doorbell_interrupt)
6720	adev->nbio.funcs->clear_doorbell_interrupt(adev);
6721
6722	return `0`;
6723	}
6724
6725	/**
6726	* amdgpu_pci_error_detected - Called when a PCI error is detected.
6727	* @pdev: PCI device struct
6728	* @state: PCI channel state
6729	*
6730	* Description: Called when a PCI error is detected.
6731	*
6732	* Return: PCI_ERS_RESULT_NEED_RESET or PCI_ERS_RESULT_DISCONNECT.
6733	*/
6734	pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
6735	{
6736	struct drm_device *dev = pci_get_drvdata(pdev);
6737	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
6738	struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
6739	struct amdgpu_reset_context reset_context;
6740	struct list_head device_list;
6741	int r = `0`;
6742
6743	dev_info(adev->dev, "PCI error: detected callback!!\n");
6744
6745	if (!amdgpu_dpm_is_link_reset_supported(adev)) {
6746	dev_warn(adev->dev, "No support for XGMI hive yet...\n");
6747	return PCI_ERS_RESULT_DISCONNECT;
6748	}
6749
6750	adev->pci_channel_state = state;
6751
6752	switch (state) {
6753	case pci_channel_io_normal:
6754	dev_info(adev->dev, "pci_channel_io_normal: state(%d)!!\n", state);
6755	return PCI_ERS_RESULT_CAN_RECOVER;
6756	case pci_channel_io_frozen:
6757	/ Fatal error, prepare for slot reset /
6758	dev_info(adev->dev, "pci_channel_io_frozen: state(%d)!!\n", state);
6759
6760	if (hive)
6761	mutex_lock(&hive->hive_lock);
6762	adev->pcie_reset_ctx.occurs_dpc = true;
6763	memset(&reset_context, `0`, sizeof(reset_context));
6764	INIT_LIST_HEAD(list: &device_list);
6765
6766	r = amdgpu_device_halt_activities(adev, NULL, reset_context: &reset_context, device_list: &device_list,
6767	hive, need_emergency_restart: false);
6768	if (hive) {
6769	mutex_unlock(lock: &hive->hive_lock);
6770	amdgpu_put_xgmi_hive(hive);
6771	}
6772	if (r)
6773	return PCI_ERS_RESULT_DISCONNECT;
6774	return PCI_ERS_RESULT_NEED_RESET;
6775	case pci_channel_io_perm_failure:
6776	/ Permanent error, prepare for device removal /
6777	dev_info(adev->dev, "pci_channel_io_perm_failure: state(%d)!!\n", state);
6778	return PCI_ERS_RESULT_DISCONNECT;
6779	}
6780
6781	return PCI_ERS_RESULT_NEED_RESET;
6782	}
6783
6784	/**
6785	* amdgpu_pci_mmio_enabled - Enable MMIO and dump debug registers
6786	* @pdev: pointer to PCI device
6787	*/
6788	pci_ers_result_t amdgpu_pci_mmio_enabled(struct pci_dev *pdev)
6789	{
6790	struct drm_device *dev = pci_get_drvdata(pdev);
6791	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
6792
6793	dev_info(adev->dev, "PCI error: mmio enabled callback!!\n");
6794
6795	/ TODO - dump whatever for debugging purposes /
6796
6797	/ This called only if amdgpu_pci_error_detected returns*
6798	* PCI_ERS_RESULT_CAN_RECOVER. Read/write to the device still
6799	* works, no need to reset slot.
6800	*/
6801
6802	return PCI_ERS_RESULT_RECOVERED;
6803	}
6804
6805	/**
6806	* amdgpu_pci_slot_reset - Called when PCI slot has been reset.
6807	* @pdev: PCI device struct
6808	*
6809	* Description: This routine is called by the pci error recovery
6810	* code after the PCI slot has been reset, just before we
6811	* should resume normal operations.
6812	*/
6813	pci_ers_result_t amdgpu_pci_slot_reset(struct pci_dev *pdev)
6814	{
6815	struct drm_device *dev = pci_get_drvdata(pdev);
6816	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
6817	struct amdgpu_reset_context reset_context;
6818	struct amdgpu_device *tmp_adev;
6819	struct amdgpu_hive_info *hive;
6820	struct list_head device_list;
6821	int r = `0`, i;
6822	u32 memsize;
6823
6824	/ PCI error slot reset should be skipped During RAS recovery /
6825	if ((amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `3`) \|\|
6826	amdgpu_ip_version(adev, ip: GC_HWIP, inst: `0`) == IP_VERSION(`9`, `4`, `4`)) &&
6827	amdgpu_ras_in_recovery(adev))
6828	return PCI_ERS_RESULT_RECOVERED;
6829
6830	dev_info(adev->dev, "PCI error: slot reset callback!!\n");
6831
6832	memset(&reset_context, `0`, sizeof(reset_context));
6833
6834	/ wait for asic to come out of reset /
6835	msleep(msecs: `700`);
6836
6837	/ Restore PCI confspace /
6838	amdgpu_device_load_pci_state(pdev);
6839
6840	/ confirm ASIC came out of reset /
6841	for (i = `0`; i < adev->usec_timeout; i++) {
6842	memsize = amdgpu_asic_get_config_memsize(adev);
6843
6844	if (memsize != `0xffffffff`)
6845	break;
6846	udelay(usec: `1`);
6847	}
6848	if (memsize == `0xffffffff`) {
6849	r = -ETIME;
6850	goto out;
6851	}
6852
6853	reset_context.method = AMD_RESET_METHOD_NONE;
6854	reset_context.reset_req_dev = adev;
6855	set_bit(nr: AMDGPU_NEED_FULL_RESET, addr: &reset_context.flags);
6856	set_bit(nr: AMDGPU_SKIP_COREDUMP, addr: &reset_context.flags);
6857	INIT_LIST_HEAD(list: &device_list);
6858
6859	hive = amdgpu_get_xgmi_hive(adev);
6860	if (hive) {
6861	mutex_lock(&hive->hive_lock);
6862	reset_context.hive = hive;
6863	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6864	tmp_adev->pcie_reset_ctx.in_link_reset = true;
6865	list_add_tail(new: &tmp_adev->reset_list, head: &device_list);
6866	}
6867	} else {
6868	set_bit(nr: AMDGPU_SKIP_HW_RESET, addr: &reset_context.flags);
6869	list_add_tail(new: &adev->reset_list, head: &device_list);
6870	}
6871
6872	r = amdgpu_device_asic_reset(adev, device_list: &device_list, reset_context: &reset_context);
6873	out:
6874	if (!r) {
6875	if (amdgpu_device_cache_pci_state(pdev: adev->pdev))
6876	pci_restore_state(dev: adev->pdev);
6877	dev_info(adev->dev, "PCIe error recovery succeeded\n");
6878	} else {
6879	dev_err(adev->dev, "PCIe error recovery failed, err:%d\n", r);
6880	if (hive) {
6881	list_for_each_entry(tmp_adev, &device_list, reset_list)
6882	amdgpu_device_unset_mp1_state(adev: tmp_adev);
6883	amdgpu_device_unlock_reset_domain(reset_domain: adev->reset_domain);
6884	}
6885	}
6886
6887	if (hive) {
6888	mutex_unlock(lock: &hive->hive_lock);
6889	amdgpu_put_xgmi_hive(hive);
6890	}
6891
6892	return r ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
6893	}
6894
6895	/**
6896	* amdgpu_pci_resume() - resume normal ops after PCI reset
6897	* @pdev: pointer to PCI device
6898	*
6899	* Called when the error recovery driver tells us that its
6900	* OK to resume normal operation.
6901	*/
6902	void amdgpu_pci_resume(struct pci_dev *pdev)
6903	{
6904	struct drm_device *dev = pci_get_drvdata(pdev);
6905	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
6906	struct list_head device_list;
6907	struct amdgpu_hive_info *hive = NULL;
6908	struct amdgpu_device *tmp_adev = NULL;
6909
6910	dev_info(adev->dev, "PCI error: resume callback!!\n");
6911
6912	/ Only continue execution for the case of pci_channel_io_frozen /
6913	if (adev->pci_channel_state != pci_channel_io_frozen)
6914	return;
6915
6916	INIT_LIST_HEAD(list: &device_list);
6917
6918	hive = amdgpu_get_xgmi_hive(adev);
6919	if (hive) {
6920	mutex_lock(&hive->hive_lock);
6921	list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
6922	tmp_adev->pcie_reset_ctx.in_link_reset = false;
6923	list_add_tail(new: &tmp_adev->reset_list, head: &device_list);
6924	}
6925	} else
6926	list_add_tail(new: &adev->reset_list, head: &device_list);
6927
6928	amdgpu_device_sched_resume(device_list: &device_list, NULL, NULL);
6929	amdgpu_device_gpu_resume(adev, device_list: &device_list, need_emergency_restart: false);
6930	adev->pcie_reset_ctx.occurs_dpc = false;
6931
6932	if (hive) {
6933	mutex_unlock(lock: &hive->hive_lock);
6934	amdgpu_put_xgmi_hive(hive);
6935	}
6936	}
6937
6938	bool amdgpu_device_cache_pci_state(struct pci_dev *pdev)
6939	{
6940	struct drm_device *dev = pci_get_drvdata(pdev);
6941	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
6942	int r;
6943
6944	if (amdgpu_sriov_vf(adev))
6945	return false;
6946
6947	r = pci_save_state(dev: pdev);
6948	if (!r) {
6949	kfree(objp: adev->pci_state);
6950
6951	adev->pci_state = pci_store_saved_state(dev: pdev);
6952
6953	if (!adev->pci_state) {
6954	DRM_ERROR("Failed to store PCI saved state");
6955	return false;
6956	}
6957	} else {
6958	DRM_WARN("Failed to save PCI state, err:%d\n", r);
6959	return false;
6960	}
6961
6962	return true;
6963	}
6964
6965	bool amdgpu_device_load_pci_state(struct pci_dev *pdev)
6966	{
6967	struct drm_device *dev = pci_get_drvdata(pdev);
6968	struct amdgpu_device *adev = drm_to_adev(ddev: dev);
6969	int r;
6970
6971	if (!adev->pci_state)
6972	return false;
6973
6974	r = pci_load_saved_state(dev: pdev, state: adev->pci_state);
6975
6976	if (!r) {
6977	pci_restore_state(dev: pdev);
6978	} else {
6979	DRM_WARN("Failed to load PCI state, err:%d\n", r);
6980	return false;
6981	}
6982
6983	return true;
6984	}
6985
6986	void amdgpu_device_flush_hdp(struct amdgpu_device *adev,
6987	struct amdgpu_ring *ring)
6988	{
6989	#ifdef CONFIG_X86_64
6990	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
6991	return;
6992	#endif
6993	if (adev->gmc.xgmi.connected_to_cpu)
6994	return;
6995
6996	if (ring && ring->funcs->emit_hdp_flush)
6997	amdgpu_ring_emit_hdp_flush(ring);
6998	else
6999	amdgpu_asic_flush_hdp(adev, ring);
7000	}
7001
7002	void amdgpu_device_invalidate_hdp(struct amdgpu_device *adev,
7003	struct amdgpu_ring *ring)
7004	{
7005	#ifdef CONFIG_X86_64
7006	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev))
7007	return;
7008	#endif
7009	if (adev->gmc.xgmi.connected_to_cpu)
7010	return;
7011
7012	amdgpu_asic_invalidate_hdp(adev, ring);
7013	}
7014
7015	int amdgpu_in_reset(struct amdgpu_device *adev)
7016	{
7017	return atomic_read(v: &adev->reset_domain->in_gpu_reset);
7018	}
7019
7020	/**
7021	* amdgpu_device_halt() - bring hardware to some kind of halt state
7022	*
7023	* @adev: amdgpu_device pointer
7024	*
7025	* Bring hardware to some kind of halt state so that no one can touch it
7026	* any more. It will help to maintain error context when error occurred.
7027	* Compare to a simple hang, the system will keep stable at least for SSH
7028	* access. Then it should be trivial to inspect the hardware state and
7029	* see what's going on. Implemented as following:
7030	*
7031	* 1. drm_dev_unplug() makes device inaccessible to user space(IOCTLs, etc),
7032	* clears all CPU mappings to device, disallows remappings through page faults
7033	* 2. amdgpu_irq_disable_all() disables all interrupts
7034	* 3. amdgpu_fence_driver_hw_fini() signals all HW fences
7035	* 4. set adev->no_hw_access to avoid potential crashes after setp 5
7036	* 5. amdgpu_device_unmap_mmio() clears all MMIO mappings
7037	* 6. pci_disable_device() and pci_wait_for_pending_transaction()
7038	* flush any in flight DMA operations
7039	*/
7040	void amdgpu_device_halt(struct amdgpu_device *adev)
7041	{
7042	struct pci_dev *pdev = adev->pdev;
7043	struct drm_device *ddev = adev_to_drm(adev);
7044
7045	amdgpu_xcp_dev_unplug(adev);
7046	drm_dev_unplug(dev: ddev);
7047
7048	amdgpu_irq_disable_all(adev);
7049
7050	amdgpu_fence_driver_hw_fini(adev);
7051
7052	adev->no_hw_access = true;
7053
7054	amdgpu_device_unmap_mmio(adev);
7055
7056	pci_disable_device(dev: pdev);
7057	pci_wait_for_pending_transaction(dev: pdev);
7058	}
7059
7060	u32 amdgpu_device_pcie_port_rreg(struct amdgpu_device *adev,
7061	u32 reg)
7062	{
7063	unsigned long flags, address, data;
7064	u32 r;
7065
7066	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7067	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7068
7069	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7070	WREG32(address, reg * `4`);
7071	(void)RREG32(address);
7072	r = RREG32(data);
7073	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
7074	return r;
7075	}
7076
7077	void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
7078	u32 reg, u32 v)
7079	{
7080	unsigned long flags, address, data;
7081
7082	address = adev->nbio.funcs->get_pcie_port_index_offset(adev);
7083	data = adev->nbio.funcs->get_pcie_port_data_offset(adev);
7084
7085	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
7086	WREG32(address, reg * `4`);
7087	(void)RREG32(address);
7088	WREG32(data, v);
7089	(void)RREG32(data);
7090	spin_unlock_irqrestore(lock: &adev->pcie_idx_lock, flags);
7091	}
7092
7093	/**
7094	* amdgpu_device_get_gang - return a reference to the current gang
7095	* @adev: amdgpu_device pointer
7096	*
7097	* Returns: A new reference to the current gang leader.
7098	*/
7099	struct dma_fence amdgpu_device_get_gang(struct* amdgpu_device *adev)
7100	{
7101	struct dma_fence *fence;
7102
7103	rcu_read_lock();
7104	fence = dma_fence_get_rcu_safe(fencep: &adev->gang_submit);
7105	rcu_read_unlock();
7106	return fence;
7107	}
7108
7109	/**
7110	* amdgpu_device_switch_gang - switch to a new gang
7111	* @adev: amdgpu_device pointer
7112	* @gang: the gang to switch to
7113	*
7114	* Try to switch to a new gang.
7115	* Returns: NULL if we switched to the new gang or a reference to the current
7116	* gang leader.
7117	*/
7118	struct dma_fence amdgpu_device_switch_gang(struct* amdgpu_device *adev,
7119	struct dma_fence *gang)
7120	{
7121	struct dma_fence *old = NULL;
7122
7123	dma_fence_get(fence: gang);
7124	do {
7125	dma_fence_put(fence: old);
7126	old = amdgpu_device_get_gang(adev);
7127	if (old == gang)
7128	break;
7129
7130	if (!dma_fence_is_signaled(fence: old)) {
7131	dma_fence_put(fence: gang);
7132	return old;
7133	}
7134
7135	} while (cmpxchg((struct dma_fence __force **)&adev->gang_submit,
7136	old, gang) != old);
7137
7138	/*
7139	* Drop it once for the exchanged reference in adev and once for the
7140	* thread local reference acquired in amdgpu_device_get_gang().
7141	*/
7142	dma_fence_put(fence: old);
7143	dma_fence_put(fence: old);
7144	return NULL;
7145	}
7146
7147	/**
7148	* amdgpu_device_enforce_isolation - enforce HW isolation
7149	* @adev: the amdgpu device pointer
7150	* @ring: the HW ring the job is supposed to run on
7151	* @job: the job which is about to be pushed to the HW ring
7152	*
7153	* Makes sure that only one client at a time can use the GFX block.
7154	* Returns: The dependency to wait on before the job can be pushed to the HW.
7155	* The function is called multiple times until NULL is returned.
7156	*/
7157	struct dma_fence amdgpu_device_enforce_isolation(struct* amdgpu_device *adev,
7158	struct amdgpu_ring *ring,
7159	struct amdgpu_job *job)
7160	{
7161	struct amdgpu_isolation *isolation = &adev->isolation[ring->xcp_id];
7162	struct drm_sched_fence *f = job->base.s_fence;
7163	struct dma_fence *dep;
7164	void *owner;
7165	int r;
7166
7167	/*
7168	* For now enforce isolation only for the GFX block since we only need
7169	* the cleaner shader on those rings.
7170	*/
7171	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX &&
7172	ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE)
7173	return NULL;
7174
7175	/*
7176	* All submissions where enforce isolation is false are handled as if
7177	* they come from a single client. Use ~0l as the owner to distinct it
7178	* from kernel submissions where the owner is NULL.
7179	*/
7180	owner = job->enforce_isolation ? f->owner : (void *)~`0l`;
7181
7182	mutex_lock(&adev->enforce_isolation_mutex);
7183
7184	/*
7185	* The "spearhead" submission is the first one which changes the
7186	* ownership to its client. We always need to wait for it to be
7187	* pushed to the HW before proceeding with anything.
7188	*/
7189	if (&f->scheduled != isolation->spearhead &&
7190	!dma_fence_is_signaled(fence: isolation->spearhead)) {
7191	dep = isolation->spearhead;
7192	goto out_grab_ref;
7193	}
7194
7195	if (isolation->owner != owner) {
7196
7197	/*
7198	* Wait for any gang to be assembled before switching to a
7199	* different owner or otherwise we could deadlock the
7200	* submissions.
7201	*/
7202	if (!job->gang_submit) {
7203	dep = amdgpu_device_get_gang(adev);
7204	if (!dma_fence_is_signaled(fence: dep))
7205	goto out_return_dep;
7206	dma_fence_put(fence: dep);
7207	}
7208
7209	dma_fence_put(fence: isolation->spearhead);
7210	isolation->spearhead = dma_fence_get(fence: &f->scheduled);
7211	amdgpu_sync_move(src: &isolation->active, dst: &isolation->prev);
7212	trace_amdgpu_isolation(prev: isolation->owner, next: owner);
7213	isolation->owner = owner;
7214	}
7215
7216	/*
7217	* Specifying the ring here helps to pipeline submissions even when
7218	* isolation is enabled. If that is not desired for testing NULL can be
7219	* used instead of the ring to enforce a CPU round trip while switching
7220	* between clients.
7221	*/
7222	dep = amdgpu_sync_peek_fence(sync: &isolation->prev, ring);
7223	r = amdgpu_sync_fence(sync: &isolation->active, f: &f->finished, GFP_NOWAIT);
7224	if (r)
7225	DRM_WARN("OOM tracking isolation\n");
7226
7227	out_grab_ref:
7228	dma_fence_get(fence: dep);
7229	out_return_dep:
7230	mutex_unlock(lock: &adev->enforce_isolation_mutex);
7231	return dep;
7232	}
7233
7234	bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
7235	{
7236	switch (adev->asic_type) {
7237	#ifdef CONFIG_DRM_AMDGPU_SI
7238	case CHIP_HAINAN:
7239	#endif
7240	case CHIP_TOPAZ:
7241	/ chips with no display hardware /
7242	return false;
7243	#ifdef CONFIG_DRM_AMDGPU_SI
7244	case CHIP_TAHITI:
7245	case CHIP_PITCAIRN:
7246	case CHIP_VERDE:
7247	case CHIP_OLAND:
7248	#endif
7249	#ifdef CONFIG_DRM_AMDGPU_CIK
7250	case CHIP_BONAIRE:
7251	case CHIP_HAWAII:
7252	case CHIP_KAVERI:
7253	case CHIP_KABINI:
7254	case CHIP_MULLINS:
7255	#endif
7256	case CHIP_TONGA:
7257	case CHIP_FIJI:
7258	case CHIP_POLARIS10:
7259	case CHIP_POLARIS11:
7260	case CHIP_POLARIS12:
7261	case CHIP_VEGAM:
7262	case CHIP_CARRIZO:
7263	case CHIP_STONEY:
7264	/ chips with display hardware /
7265	return true;
7266	default:
7267	/ IP discovery /
7268	if (!amdgpu_ip_version(adev, ip: DCE_HWIP, inst: `0`) \|\|
7269	(adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
7270	return false;
7271	return true;
7272	}
7273	}
7274
7275	uint32_t amdgpu_device_wait_on_rreg(struct amdgpu_device *adev,
7276	uint32_t inst, uint32_t reg_addr, char reg_name[],
7277	uint32_t expected_value, uint32_t mask)
7278	{
7279	uint32_t ret = `0`;
7280	uint32_t old_ = `0`;
7281	uint32_t tmp_ = RREG32(reg_addr);
7282	uint32_t loop = adev->usec_timeout;
7283
7284	while ((tmp_ & (mask)) != (expected_value)) {
7285	if (old_ != tmp_) {
7286	loop = adev->usec_timeout;
7287	old_ = tmp_;
7288	} else
7289	udelay(usec: `1`);
7290	tmp_ = RREG32(reg_addr);
7291	loop--;
7292	if (!loop) {
7293	DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08xn",
7294	inst, reg_name, (uint32_t)expected_value,
7295	(uint32_t)(tmp_ & (mask)));
7296	ret = -ETIMEDOUT;
7297	break;
7298	}
7299	}
7300	return ret;
7301	}
7302
7303	ssize_t amdgpu_get_soft_full_reset_mask(struct amdgpu_ring *ring)
7304	{
7305	ssize_t size = `0`;
7306
7307	if (!ring \|\| !ring->adev)
7308	return size;
7309
7310	if (amdgpu_device_should_recover_gpu(adev: ring->adev))
7311	size \|= AMDGPU_RESET_TYPE_FULL;
7312
7313	if (unlikely(!ring->adev->debug_disable_soft_recovery) &&
7314	!amdgpu_sriov_vf(ring->adev) && ring->funcs->soft_recovery)
7315	size \|= AMDGPU_RESET_TYPE_SOFT_RESET;
7316
7317	return size;
7318	}
7319
7320	ssize_t amdgpu_show_reset_mask(char *buf, uint32_t supported_reset)
7321	{
7322	ssize_t size = `0`;
7323
7324	if (supported_reset == `0`) {
7325	size += sysfs_emit_at(buf, at: size, fmt: "unsupported");
7326	size += sysfs_emit_at(buf, at: size, fmt: "\n");
7327	return size;
7328
7329	}
7330
7331	if (supported_reset & AMDGPU_RESET_TYPE_SOFT_RESET)
7332	size += sysfs_emit_at(buf, at: size, fmt: "soft ");
7333
7334	if (supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
7335	size += sysfs_emit_at(buf, at: size, fmt: "queue ");
7336
7337	if (supported_reset & AMDGPU_RESET_TYPE_PER_PIPE)
7338	size += sysfs_emit_at(buf, at: size, fmt: "pipe ");
7339
7340	if (supported_reset & AMDGPU_RESET_TYPE_FULL)
7341	size += sysfs_emit_at(buf, at: size, fmt: "full ");
7342
7343	size += sysfs_emit_at(buf, at: size, fmt: "\n");
7344	return size;
7345	}
7346

Provided by KDAB

Definitions

amdgpu_kms_driver
amdgpu_asic_name
amdgpu_init_default
amdgpu_init_recovery
amdgpu_init_minimal_xgmi
amdgpu_ip_member_of_hwini
amdgpu_set_init_level
amdgpu_device_get_pcie_replay_count
amdgpu_device_attr_sysfs_init
amdgpu_device_attr_sysfs_fini
amdgpu_sysfs_reg_state_get
amdgpu_reg_state_sysfs_init
amdgpu_reg_state_sysfs_fini
amdgpu_ip_block_suspend
amdgpu_ip_block_resume
amdgpu_device_get_board_info
amdgpu_board_attrs
amdgpu_board_attrs_is_visible
amdgpu_board_attrs_group
amdgpu_device_supports_px
amdgpu_device_supports_boco
amdgpu_device_supports_baco
amdgpu_device_detect_runtime_pm_mode
amdgpu_device_supports_smart_shift
amdgpu_device_mm_access
amdgpu_device_aper_access
amdgpu_device_vram_access
amdgpu_device_skip_hw_access
amdgpu_device_rreg
amdgpu_mm_rreg8
amdgpu_device_xcc_rreg
amdgpu_mm_wreg8
amdgpu_device_wreg
amdgpu_mm_wreg_mmio_rlc
amdgpu_device_xcc_wreg
amdgpu_device_indirect_rreg
amdgpu_device_indirect_rreg_ext
amdgpu_device_indirect_rreg64
amdgpu_device_indirect_rreg64_ext
amdgpu_device_indirect_wreg
amdgpu_device_indirect_wreg_ext
amdgpu_device_indirect_wreg64
amdgpu_device_indirect_wreg64_ext
amdgpu_device_get_rev_id
amdgpu_invalid_rreg
amdgpu_invalid_rreg_ext
amdgpu_invalid_wreg
amdgpu_invalid_wreg_ext
amdgpu_invalid_rreg64
amdgpu_invalid_rreg64_ext
amdgpu_invalid_wreg64
amdgpu_invalid_wreg64_ext
amdgpu_block_invalid_rreg
amdgpu_block_invalid_wreg
amdgpu_device_get_vbios_flags
amdgpu_device_asic_init
amdgpu_device_mem_scratch_init
amdgpu_device_mem_scratch_fini
amdgpu_device_program_register_sequence
amdgpu_device_pci_config_reset
amdgpu_device_pci_reset
amdgpu_device_wb_fini
amdgpu_device_wb_init
amdgpu_device_wb_get
amdgpu_device_wb_free
amdgpu_device_resize_fb_bar
amdgpu_device_need_post
amdgpu_device_seamless_boot_supported
amdgpu_device_pcie_dynamic_switching_supported
amdgpu_device_aspm_support_quirk
amdgpu_device_should_use_aspm
amdgpu_device_vga_set_decode
amdgpu_device_check_block_size
amdgpu_device_check_vm_size
amdgpu_device_check_smu_prv_buffer_size
amdgpu_device_init_apu_flags
amdgpu_device_check_arguments
amdgpu_switcheroo_set_state
amdgpu_switcheroo_can_switch
amdgpu_switcheroo_ops
amdgpu_device_ip_set_clockgating_state
amdgpu_device_ip_set_powergating_state
amdgpu_device_ip_get_clockgating_state
amdgpu_device_ip_wait_for_idle
amdgpu_device_ip_is_valid
amdgpu_device_ip_get_ip_block
amdgpu_device_ip_block_version_cmp
amdgpu_device_ip_block_add
amdgpu_device_enable_virtual_display
amdgpu_device_set_sriov_virtual_display
amdgpu_device_parse_gpu_info_fw
amdgpu_device_ip_early_init
amdgpu_device_ip_hw_init_phase1
amdgpu_device_ip_hw_init_phase2
amdgpu_device_fw_loading
amdgpu_device_init_schedulers
amdgpu_device_ip_init
amdgpu_device_fill_reset_magic
amdgpu_device_check_vram_lost
amdgpu_device_set_cg_state
amdgpu_device_set_pg_state
amdgpu_device_enable_mgpu_fan_boost
amdgpu_device_ip_late_init
amdgpu_ip_block_hw_fini
amdgpu_device_smu_fini_early
amdgpu_device_ip_fini_early
amdgpu_device_ip_fini
amdgpu_device_delayed_init_work_handler
amdgpu_device_delay_enable_gfx_off
amdgpu_device_ip_suspend_phase1
amdgpu_device_ip_suspend_phase2
amdgpu_device_ip_suspend
amdgpu_device_ip_reinit_early_sriov
amdgpu_device_ip_reinit_late_sriov
amdgpu_device_ip_resume_phase1
amdgpu_device_ip_resume_phase2
amdgpu_device_ip_resume_phase3
amdgpu_device_ip_resume
amdgpu_device_detect_sriov_bios
amdgpu_device_asic_has_dc_support
amdgpu_device_has_dc_support
amdgpu_device_xgmi_reset_func
amdgpu_device_get_job_timeout_settings
amdgpu_device_check_iommu_direct_map
amdgpu_device_check_iommu_remap
amdgpu_device_set_mcbp
amdgpu_device_init
amdgpu_device_unmap_mmio
amdgpu_device_fini_hw
amdgpu_device_fini_sw
amdgpu_device_evict_resources
amdgpu_device_pm_notifier
amdgpu_device_prepare
amdgpu_device_suspend
amdgpu_device_resume
amdgpu_device_ip_check_soft_reset
amdgpu_device_ip_pre_soft_reset
amdgpu_device_ip_need_full_reset
amdgpu_device_ip_soft_reset
amdgpu_device_ip_post_soft_reset
amdgpu_device_reset_sriov
amdgpu_device_has_job_running
amdgpu_device_should_recover_gpu
amdgpu_device_mode1_reset
amdgpu_device_link_reset
amdgpu_device_pre_asic_reset
amdgpu_device_reinit_after_reset
amdgpu_do_asic_reset
amdgpu_device_set_mp1_state
amdgpu_device_unset_mp1_state
amdgpu_device_resume_display_audio
amdgpu_device_suspend_display_audio
amdgpu_device_stop_pending_resets
amdgpu_device_health_check
amdgpu_device_halt_activities
amdgpu_device_asic_reset
amdgpu_device_sched_resume
amdgpu_device_gpu_resume
amdgpu_device_gpu_recover
amdgpu_device_partner_bandwidth
amdgpu_device_gpu_bandwidth
amdgpu_device_get_pcie_info
amdgpu_device_is_peer_accessible
amdgpu_device_baco_enter
amdgpu_device_baco_exit
amdgpu_pci_error_detected
amdgpu_pci_mmio_enabled
amdgpu_pci_slot_reset
amdgpu_pci_resume
amdgpu_device_cache_pci_state
amdgpu_device_load_pci_state
amdgpu_device_flush_hdp
amdgpu_device_invalidate_hdp
amdgpu_in_reset
amdgpu_device_halt
amdgpu_device_pcie_port_rreg
amdgpu_device_pcie_port_wreg
amdgpu_device_get_gang
amdgpu_device_switch_gang
amdgpu_device_enforce_isolation
amdgpu_device_has_display_hardware
amdgpu_device_wait_on_rreg
amdgpu_get_soft_full_reset_mask

Improve your Profiling and Debugging skills

Find out more

Definitions

source code of linux/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c