1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /* |
3 | * Copyright 2014-2022 Advanced Micro Devices, Inc. |
4 | * |
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
6 | * copy of this software and associated documentation files (the "Software"), |
7 | * to deal in the Software without restriction, including without limitation |
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
9 | * and/or sell copies of the Software, and to permit persons to whom the |
10 | * Software is furnished to do so, subject to the following conditions: |
11 | * |
12 | * The above copyright notice and this permission notice shall be included in |
13 | * all copies or substantial portions of the Software. |
14 | * |
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
18 | * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR |
19 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
20 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
21 | * OTHER DEALINGS IN THE SOFTWARE. |
22 | * |
23 | */ |
24 | |
25 | #include "kfd_device_queue_manager.h" |
26 | #include "gca/gfx_8_0_enum.h" |
27 | #include "gca/gfx_8_0_sh_mask.h" |
28 | #include "oss/oss_3_0_sh_mask.h" |
29 | |
30 | static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, |
31 | struct qcm_process_device *qpd, |
32 | enum cache_policy default_policy, |
33 | enum cache_policy alternate_policy, |
34 | void __user *alternate_aperture_base, |
35 | uint64_t alternate_aperture_size); |
36 | static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, |
37 | struct qcm_process_device *qpd, |
38 | enum cache_policy default_policy, |
39 | enum cache_policy alternate_policy, |
40 | void __user *alternate_aperture_base, |
41 | uint64_t alternate_aperture_size); |
42 | static int update_qpd_vi(struct device_queue_manager *dqm, |
43 | struct qcm_process_device *qpd); |
44 | static int update_qpd_vi_tonga(struct device_queue_manager *dqm, |
45 | struct qcm_process_device *qpd); |
46 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, |
47 | struct qcm_process_device *qpd); |
48 | static void init_sdma_vm_tonga(struct device_queue_manager *dqm, |
49 | struct queue *q, |
50 | struct qcm_process_device *qpd); |
51 | |
52 | void device_queue_manager_init_vi( |
53 | struct device_queue_manager_asic_ops *asic_ops) |
54 | { |
55 | asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi; |
56 | asic_ops->update_qpd = update_qpd_vi; |
57 | asic_ops->init_sdma_vm = init_sdma_vm; |
58 | asic_ops->mqd_manager_init = mqd_manager_init_vi; |
59 | } |
60 | |
61 | void device_queue_manager_init_vi_tonga( |
62 | struct device_queue_manager_asic_ops *asic_ops) |
63 | { |
64 | asic_ops->set_cache_memory_policy = set_cache_memory_policy_vi_tonga; |
65 | asic_ops->update_qpd = update_qpd_vi_tonga; |
66 | asic_ops->init_sdma_vm = init_sdma_vm_tonga; |
67 | asic_ops->mqd_manager_init = mqd_manager_init_vi_tonga; |
68 | } |
69 | |
70 | static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) |
71 | { |
72 | /* In 64-bit mode, we can only control the top 3 bits of the LDS, |
73 | * scratch and GPUVM apertures. |
74 | * The hardware fills in the remaining 59 bits according to the |
75 | * following pattern: |
76 | * LDS: X0000000'00000000 - X0000001'00000000 (4GB) |
77 | * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) |
78 | * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) |
79 | * |
80 | * (where X/Y is the configurable nybble with the low-bit 0) |
81 | * |
82 | * LDS and scratch will have the same top nybble programmed in the |
83 | * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. |
84 | * GPUVM can have a different top nybble programmed in the |
85 | * top 3 bits of SH_MEM_BASES.SHARED_BASE. |
86 | * We don't bother to support different top nybbles |
87 | * for LDS/Scratch and GPUVM. |
88 | */ |
89 | |
90 | WARN_ON((top_address_nybble & 1) || top_address_nybble > 0xE || |
91 | top_address_nybble == 0); |
92 | |
93 | return top_address_nybble << 12 | |
94 | (top_address_nybble << 12) << |
95 | SH_MEM_BASES__SHARED_BASE__SHIFT; |
96 | } |
97 | |
98 | static bool set_cache_memory_policy_vi(struct device_queue_manager *dqm, |
99 | struct qcm_process_device *qpd, |
100 | enum cache_policy default_policy, |
101 | enum cache_policy alternate_policy, |
102 | void __user *alternate_aperture_base, |
103 | uint64_t alternate_aperture_size) |
104 | { |
105 | uint32_t default_mtype; |
106 | uint32_t ape1_mtype; |
107 | |
108 | default_mtype = (default_policy == cache_policy_coherent) ? |
109 | MTYPE_CC : |
110 | MTYPE_NC; |
111 | |
112 | ape1_mtype = (alternate_policy == cache_policy_coherent) ? |
113 | MTYPE_CC : |
114 | MTYPE_NC; |
115 | |
116 | qpd->sh_mem_config = (qpd->sh_mem_config & |
117 | SH_MEM_CONFIG__ADDRESS_MODE_MASK) | |
118 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << |
119 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | |
120 | default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | |
121 | ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT | |
122 | SH_MEM_CONFIG__PRIVATE_ATC_MASK; |
123 | |
124 | return true; |
125 | } |
126 | |
127 | static bool set_cache_memory_policy_vi_tonga(struct device_queue_manager *dqm, |
128 | struct qcm_process_device *qpd, |
129 | enum cache_policy default_policy, |
130 | enum cache_policy alternate_policy, |
131 | void __user *alternate_aperture_base, |
132 | uint64_t alternate_aperture_size) |
133 | { |
134 | uint32_t default_mtype; |
135 | uint32_t ape1_mtype; |
136 | |
137 | default_mtype = (default_policy == cache_policy_coherent) ? |
138 | MTYPE_UC : |
139 | MTYPE_NC; |
140 | |
141 | ape1_mtype = (alternate_policy == cache_policy_coherent) ? |
142 | MTYPE_UC : |
143 | MTYPE_NC; |
144 | |
145 | qpd->sh_mem_config = |
146 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << |
147 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | |
148 | default_mtype << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | |
149 | ape1_mtype << SH_MEM_CONFIG__APE1_MTYPE__SHIFT; |
150 | |
151 | return true; |
152 | } |
153 | |
154 | static int update_qpd_vi(struct device_queue_manager *dqm, |
155 | struct qcm_process_device *qpd) |
156 | { |
157 | struct kfd_process_device *pdd; |
158 | unsigned int temp; |
159 | |
160 | pdd = qpd_to_pdd(qpd); |
161 | |
162 | /* check if sh_mem_config register already configured */ |
163 | if (qpd->sh_mem_config == 0) { |
164 | qpd->sh_mem_config = |
165 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << |
166 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | |
167 | MTYPE_CC << SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | |
168 | MTYPE_CC << SH_MEM_CONFIG__APE1_MTYPE__SHIFT | |
169 | SH_MEM_CONFIG__PRIVATE_ATC_MASK; |
170 | |
171 | qpd->sh_mem_ape1_limit = 0; |
172 | qpd->sh_mem_ape1_base = 0; |
173 | } |
174 | |
175 | if (qpd->pqm->process->is_32bit_user_mode) { |
176 | temp = get_sh_mem_bases_32(pdd); |
177 | qpd->sh_mem_bases = temp << SH_MEM_BASES__SHARED_BASE__SHIFT; |
178 | qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA32 << |
179 | SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; |
180 | } else { |
181 | temp = get_sh_mem_bases_nybble_64(pdd); |
182 | qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); |
183 | qpd->sh_mem_config |= SH_MEM_ADDRESS_MODE_HSA64 << |
184 | SH_MEM_CONFIG__ADDRESS_MODE__SHIFT; |
185 | qpd->sh_mem_config |= 1 << |
186 | SH_MEM_CONFIG__PRIVATE_ATC__SHIFT; |
187 | } |
188 | |
189 | pr_debug("is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n" , |
190 | qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); |
191 | |
192 | return 0; |
193 | } |
194 | |
195 | static int update_qpd_vi_tonga(struct device_queue_manager *dqm, |
196 | struct qcm_process_device *qpd) |
197 | { |
198 | struct kfd_process_device *pdd; |
199 | unsigned int temp; |
200 | |
201 | pdd = qpd_to_pdd(qpd); |
202 | |
203 | /* check if sh_mem_config register already configured */ |
204 | if (qpd->sh_mem_config == 0) { |
205 | qpd->sh_mem_config = |
206 | SH_MEM_ALIGNMENT_MODE_UNALIGNED << |
207 | SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT | |
208 | MTYPE_UC << |
209 | SH_MEM_CONFIG__DEFAULT_MTYPE__SHIFT | |
210 | MTYPE_UC << |
211 | SH_MEM_CONFIG__APE1_MTYPE__SHIFT; |
212 | |
213 | qpd->sh_mem_ape1_limit = 0; |
214 | qpd->sh_mem_ape1_base = 0; |
215 | } |
216 | |
217 | /* On dGPU we're always in GPUVM64 addressing mode with 64-bit |
218 | * aperture addresses. |
219 | */ |
220 | temp = get_sh_mem_bases_nybble_64(pdd); |
221 | qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); |
222 | |
223 | pr_debug("sh_mem_bases nybble: 0x%X and register 0x%X\n" , |
224 | temp, qpd->sh_mem_bases); |
225 | |
226 | return 0; |
227 | } |
228 | |
229 | static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q, |
230 | struct qcm_process_device *qpd) |
231 | { |
232 | uint32_t value = (1 << SDMA0_RLC0_VIRTUAL_ADDR__ATC__SHIFT); |
233 | |
234 | if (q->process->is_32bit_user_mode) |
235 | value |= (1 << SDMA0_RLC0_VIRTUAL_ADDR__PTR32__SHIFT) | |
236 | get_sh_mem_bases_32(qpd_to_pdd(qpd)); |
237 | else |
238 | value |= ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << |
239 | SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & |
240 | SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; |
241 | |
242 | q->properties.sdma_vm_addr = value; |
243 | } |
244 | |
245 | static void init_sdma_vm_tonga(struct device_queue_manager *dqm, |
246 | struct queue *q, |
247 | struct qcm_process_device *qpd) |
248 | { |
249 | /* On dGPU we're always in GPUVM64 addressing mode with 64-bit |
250 | * aperture addresses. |
251 | */ |
252 | q->properties.sdma_vm_addr = |
253 | ((get_sh_mem_bases_nybble_64(qpd_to_pdd(qpd))) << |
254 | SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE__SHIFT) & |
255 | SDMA0_RLC0_VIRTUAL_ADDR__SHARED_BASE_MASK; |
256 | } |
257 | |